mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 09:57:08 +03:00
gpu: nvgpu: Removal of regops from CUDA driver
The current CUDA drivers have been using the regops to directly accessing the GPU registers from user space through the dbg node. This is a security hole and needs to be avoided. The patch alternatively implements the similar functionality in the kernel and provide an ioctl for it. Bug 200083334 Change-Id: Ic5ff5a215cbabe7a46837bc4e15efcceb0df0367 Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com> Reviewed-on: http://git-master/r/711758 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Dan Willemsen
parent
cf0085ec23
commit
895675e1d5
@@ -20,9 +20,16 @@
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/nvgpu.h>
|
||||
#include <uapi/linux/nvgpu.h>
|
||||
#include <linux/delay.h>
|
||||
|
||||
#include "gk20a.h"
|
||||
#include "gr_gk20a.h"
|
||||
#include "fence_gk20a.h"
|
||||
#include "regops_gk20a.h"
|
||||
#include "hw_gr_gk20a.h"
|
||||
#include "hw_fb_gk20a.h"
|
||||
#include "hw_proj_gk20a.h"
|
||||
|
||||
|
||||
int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
@@ -257,6 +264,238 @@ static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Invalidate i-cache for kepler & maxwell */
|
||||
static int nvgpu_gpu_ioctl_inval_icache(
|
||||
struct gk20a *g,
|
||||
struct nvgpu_gpu_inval_icache_args *args)
|
||||
{
|
||||
|
||||
int err = 0;
|
||||
u32 cache_ctrl, regval;
|
||||
struct channel_gk20a *ch;
|
||||
struct nvgpu_dbg_gpu_reg_op ops;
|
||||
|
||||
ch = gk20a_get_channel_from_file(args->channel_fd);
|
||||
|
||||
ops.op = REGOP(READ_32);
|
||||
ops.type = REGOP(TYPE_GR_CTX);
|
||||
ops.status = REGOP(STATUS_SUCCESS);
|
||||
ops.value_hi = 0;
|
||||
ops.and_n_mask_lo = 0;
|
||||
ops.and_n_mask_hi = 0;
|
||||
ops.offset = gr_pri_gpc0_gcc_dbg_r();
|
||||
|
||||
/* Take the global lock, since we'll be doing global regops */
|
||||
mutex_lock(&g->dbg_sessions_lock);
|
||||
|
||||
err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
|
||||
|
||||
regval = ops.value_lo;
|
||||
|
||||
if (!err) {
|
||||
ops.op = REGOP(WRITE_32);
|
||||
ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
|
||||
err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
|
||||
}
|
||||
|
||||
if (err) {
|
||||
gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
|
||||
goto end;
|
||||
}
|
||||
|
||||
cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
|
||||
cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
|
||||
gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
|
||||
|
||||
end:
|
||||
mutex_unlock(&g->dbg_sessions_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_gpu_ioctl_set_mmu_debug_mode(
|
||||
struct gk20a *g,
|
||||
struct nvgpu_gpu_mmu_debug_mode_args *args)
|
||||
{
|
||||
int err = 0;
|
||||
u32 mmu_debug_ctrl;
|
||||
|
||||
err = gk20a_busy(g->dev);
|
||||
if (err) {
|
||||
gk20a_err(dev_from_gk20a(g), "failed to power on gpu\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mutex_lock(&g->dbg_sessions_lock);
|
||||
|
||||
if (args->state == 1) {
|
||||
mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_v();
|
||||
g->mmu_debug_ctrl = true;
|
||||
} else {
|
||||
mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_v();
|
||||
g->mmu_debug_ctrl = false;
|
||||
}
|
||||
|
||||
mmu_debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
|
||||
mmu_debug_ctrl = set_field(mmu_debug_ctrl, fb_mmu_debug_ctrl_debug_m(), mmu_debug_ctrl);
|
||||
gk20a_writel(g, fb_mmu_debug_ctrl_r(), mmu_debug_ctrl);
|
||||
|
||||
mutex_unlock(&g->dbg_sessions_lock);
|
||||
gk20a_idle(g->dev);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_gpu_ioctl_set_debug_mode(
|
||||
struct gk20a *g,
|
||||
struct nvgpu_gpu_sm_debug_mode_args *args)
|
||||
{
|
||||
int gpc, tpc, err = 0;
|
||||
u32 sm_id, sm_dbgr_ctrl0;
|
||||
struct channel_gk20a *ch;
|
||||
struct nvgpu_dbg_gpu_reg_op ops;
|
||||
u32 tpc_offset, gpc_offset, reg_offset;
|
||||
|
||||
ch = gk20a_get_channel_from_file(args->channel_fd);
|
||||
|
||||
mutex_lock(&g->dbg_sessions_lock);
|
||||
|
||||
for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
|
||||
if (args->sms & (1 << sm_id)) {
|
||||
gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
|
||||
tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
|
||||
|
||||
tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
|
||||
gpc_offset = proj_gpc_stride_v() * gpc;
|
||||
reg_offset = tpc_offset + gpc_offset;
|
||||
|
||||
ops.op = REGOP(READ_32);
|
||||
ops.type = REGOP(TYPE_GR_CTX);
|
||||
ops.status = REGOP(STATUS_SUCCESS);
|
||||
ops.value_hi = 0;
|
||||
ops.and_n_mask_lo = 0;
|
||||
ops.and_n_mask_hi = 0;
|
||||
ops.offset = gr_gpc0_tpc0_sm_dbgr_control0_r() + reg_offset;
|
||||
|
||||
err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
|
||||
sm_dbgr_ctrl0 = ops.value_lo;
|
||||
|
||||
if (args->enable) {
|
||||
sm_dbgr_ctrl0 = gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v() |
|
||||
gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f() |
|
||||
gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f() |
|
||||
sm_dbgr_ctrl0;
|
||||
} else
|
||||
sm_dbgr_ctrl0 = gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_v() | sm_dbgr_ctrl0;
|
||||
|
||||
if (!err) {
|
||||
ops.op = REGOP(WRITE_32);
|
||||
ops.value_lo = sm_dbgr_ctrl0;
|
||||
err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
|
||||
} else
|
||||
gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&g->dbg_sessions_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_gpu_ioctl_wait_for_pause(
|
||||
struct gk20a *g,
|
||||
struct nvgpu_gpu_wait_pause_args *args)
|
||||
{
|
||||
int err = 0, gpc, tpc;
|
||||
u32 sm_count, sm_id, size;
|
||||
struct warpstate *w_state;
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
u32 tpc_offset, gpc_offset, reg_offset, global_mask;
|
||||
u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
|
||||
|
||||
sm_count = g->gr.gpc_count * g->gr.tpc_count;
|
||||
size = sm_count * sizeof(struct warpstate);
|
||||
w_state = kzalloc(size, GFP_KERNEL);
|
||||
|
||||
global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() |
|
||||
gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
|
||||
gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
|
||||
|
||||
mutex_lock(&g->dbg_sessions_lock);
|
||||
|
||||
for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
|
||||
|
||||
gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
|
||||
tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
|
||||
|
||||
tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
|
||||
gpc_offset = proj_gpc_stride_v() * gpc;
|
||||
reg_offset = tpc_offset + gpc_offset;
|
||||
|
||||
/* Wait until all valid warps on the sm are paused. The valid warp mask
|
||||
* must be re-read with the paused mask because new warps may become
|
||||
* valid as the sm is pausing.
|
||||
*/
|
||||
|
||||
err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask);
|
||||
if (err) {
|
||||
gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
|
||||
goto end;
|
||||
}
|
||||
|
||||
/* 64 bit read */
|
||||
warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset) << 32;
|
||||
warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4);
|
||||
|
||||
/* 64 bit read */
|
||||
warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset) << 32;
|
||||
warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4);
|
||||
|
||||
/* 64 bit read */
|
||||
warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset) << 32;
|
||||
warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4);
|
||||
|
||||
w_state[sm_id].valid_warps = warps_valid;
|
||||
w_state[sm_id].trapped_warps = warps_trapped;
|
||||
w_state[sm_id].paused_warps = warps_paused;
|
||||
}
|
||||
|
||||
if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) {
|
||||
gk20a_dbg_fn("copy_to_user failed!");
|
||||
err = -EFAULT;
|
||||
}
|
||||
|
||||
end:
|
||||
mutex_unlock(&g->dbg_sessions_lock);
|
||||
kfree(w_state);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_gpu_ioctl_has_any_exception(
|
||||
struct gk20a *g,
|
||||
struct nvgpu_gpu_tpc_exception_en_status_args *args)
|
||||
{
|
||||
int err = 0;
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
u32 sm_id, tpc_exception_en = 0;
|
||||
u32 offset, regval, tpc_offset, gpc_offset;
|
||||
|
||||
mutex_lock(&g->dbg_sessions_lock);
|
||||
|
||||
for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
|
||||
|
||||
tpc_offset = proj_tpc_in_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].tpc_index;
|
||||
gpc_offset = proj_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].gpc_index;
|
||||
offset = tpc_offset + gpc_offset;
|
||||
|
||||
regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
|
||||
offset);
|
||||
/* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
|
||||
tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
|
||||
}
|
||||
|
||||
mutex_unlock(&g->dbg_sessions_lock);
|
||||
args->tpc_exception_en_sm_mask = tpc_exception_en;
|
||||
return err;
|
||||
}
|
||||
|
||||
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct platform_device *dev = filp->private_data;
|
||||
@@ -441,6 +680,31 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
|
||||
err = nvgpu_gpu_ioctl_l2_fb_ops(g,
|
||||
(struct nvgpu_gpu_l2_fb_args *)buf);
|
||||
break;
|
||||
case NVGPU_GPU_IOCTL_INVAL_ICACHE:
|
||||
err = gr_gk20a_elpg_protected_call(g,
|
||||
nvgpu_gpu_ioctl_inval_icache(g, (struct nvgpu_gpu_inval_icache_args *)buf));
|
||||
break;
|
||||
|
||||
case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE:
|
||||
err = nvgpu_gpu_ioctl_set_mmu_debug_mode(g,
|
||||
(struct nvgpu_gpu_mmu_debug_mode_args *)buf);
|
||||
break;
|
||||
|
||||
case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE:
|
||||
err = gr_gk20a_elpg_protected_call(g,
|
||||
nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf));
|
||||
break;
|
||||
|
||||
case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE:
|
||||
err = nvgpu_gpu_ioctl_wait_for_pause(g,
|
||||
(struct nvgpu_gpu_wait_pause_args *)buf);
|
||||
break;
|
||||
|
||||
case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS:
|
||||
err = nvgpu_gpu_ioctl_has_any_exception(g,
|
||||
(struct nvgpu_gpu_tpc_exception_en_status_args *)buf);
|
||||
break;
|
||||
|
||||
default:
|
||||
dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
|
||||
err = -ENOTTY;
|
||||
@@ -452,4 +716,3 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@@ -56,6 +56,8 @@
|
||||
#include "hw_sim_gk20a.h"
|
||||
#include "hw_top_gk20a.h"
|
||||
#include "hw_ltc_gk20a.h"
|
||||
#include "hw_gr_gk20a.h"
|
||||
#include "hw_fb_gk20a.h"
|
||||
#include "gk20a_scale.h"
|
||||
#include "dbg_gpu_gk20a.h"
|
||||
#include "hal.h"
|
||||
@@ -727,6 +729,21 @@ static int gk20a_detect_chip(struct gk20a *g)
|
||||
return gpu_init_hal(g);
|
||||
}
|
||||
|
||||
void gk20a_pm_restore_debug_setting(struct gk20a *g)
|
||||
{
|
||||
u32 mmu_debug_ctrl;
|
||||
|
||||
/* restore mmu debug state */
|
||||
if (g->mmu_debug_ctrl)
|
||||
mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_v();
|
||||
else
|
||||
mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_v();
|
||||
|
||||
mmu_debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
|
||||
mmu_debug_ctrl = set_field(mmu_debug_ctrl, fb_mmu_debug_ctrl_debug_m(), mmu_debug_ctrl);
|
||||
gk20a_writel(g, fb_mmu_debug_ctrl_r(), mmu_debug_ctrl);
|
||||
}
|
||||
|
||||
static int gk20a_pm_finalize_poweron(struct device *dev)
|
||||
{
|
||||
struct platform_device *pdev = to_platform_device(dev);
|
||||
@@ -851,6 +868,9 @@ static int gk20a_pm_finalize_poweron(struct device *dev)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Restore the debug setting */
|
||||
gk20a_pm_restore_debug_setting(g);
|
||||
|
||||
gk20a_channel_resume(g);
|
||||
set_user_nice(current, nice_value);
|
||||
|
||||
|
||||
@@ -511,6 +511,7 @@ struct gk20a {
|
||||
struct device_dma_parameters dma_parms;
|
||||
|
||||
struct gk20a_cde_app cde_app;
|
||||
bool mmu_debug_ctrl;
|
||||
};
|
||||
|
||||
static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g)
|
||||
|
||||
@@ -1273,7 +1273,6 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
|
||||
u32 tpc_index, gpc_index;
|
||||
u32 tpc_offset, gpc_offset;
|
||||
u32 sm_id = 0, gpc_id = 0;
|
||||
u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()];
|
||||
u32 tpc_per_gpc;
|
||||
u32 max_ways_evict = INVALID_MAX_WAYS;
|
||||
u32 l1c_dbg_reg_val;
|
||||
@@ -1295,7 +1294,9 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
|
||||
gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
|
||||
gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
|
||||
|
||||
sm_id_to_gpc_id[sm_id] = gpc_index;
|
||||
g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index;
|
||||
g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index;
|
||||
|
||||
sm_id++;
|
||||
}
|
||||
|
||||
@@ -1306,6 +1307,8 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
|
||||
}
|
||||
}
|
||||
|
||||
gr->no_of_sm = sm_id;
|
||||
|
||||
for (tpc_index = 0, gpc_id = 0;
|
||||
tpc_index < gr_pd_num_tpc_per_gpc__size_1_v();
|
||||
tpc_index++, gpc_id += 8) {
|
||||
@@ -2997,6 +3000,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
|
||||
kfree(gr->pes_tpc_count[1]);
|
||||
kfree(gr->pes_tpc_mask[0]);
|
||||
kfree(gr->pes_tpc_mask[1]);
|
||||
kfree(gr->sm_to_cluster);
|
||||
kfree(gr->gpc_skip_mask);
|
||||
kfree(gr->map_tiles);
|
||||
gr->gpc_tpc_count = NULL;
|
||||
@@ -3089,6 +3093,7 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
||||
gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
|
||||
gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
|
||||
gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
|
||||
|
||||
gr->gpc_skip_mask =
|
||||
kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32),
|
||||
GFP_KERNEL);
|
||||
@@ -3159,6 +3164,10 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
||||
gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
|
||||
}
|
||||
|
||||
gr->sm_to_cluster = kzalloc(gr->gpc_count * gr->tpc_count *
|
||||
sizeof(struct sm_info), GFP_KERNEL);
|
||||
gr->no_of_sm = 0;
|
||||
|
||||
gk20a_dbg_info("fbps: %d", gr->num_fbps);
|
||||
gk20a_dbg_info("max_gpc_count: %d", gr->max_gpc_count);
|
||||
gk20a_dbg_info("max_fbps_count: %d", gr->max_fbps_count);
|
||||
|
||||
@@ -165,6 +165,11 @@ struct zbc_query_params {
|
||||
u32 index_size; /* [out] size, [in] index */
|
||||
};
|
||||
|
||||
struct sm_info {
|
||||
u8 gpc_index;
|
||||
u8 tpc_index;
|
||||
};
|
||||
|
||||
struct gr_gk20a {
|
||||
struct gk20a *g;
|
||||
struct {
|
||||
@@ -290,6 +295,8 @@ struct gr_gk20a {
|
||||
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
|
||||
struct gr_t18x t18x;
|
||||
#endif
|
||||
u32 no_of_sm;
|
||||
struct sm_info *sm_to_cluster;
|
||||
};
|
||||
|
||||
void gk20a_fecs_dump_falcon_stats(struct gk20a *g);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2012-2015, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -194,10 +194,18 @@ static inline u32 fb_mmu_debug_ctrl_debug_v(u32 r)
|
||||
{
|
||||
return (r >> 16) & 0x1;
|
||||
}
|
||||
static inline u32 fb_mmu_debug_ctrl_debug_m(void)
|
||||
{
|
||||
return 0x1 << 16;
|
||||
}
|
||||
static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void)
|
||||
{
|
||||
return 0x00000001;
|
||||
}
|
||||
static inline u32 fb_mmu_debug_ctrl_debug_disabled_v(void)
|
||||
{
|
||||
return 0x00000000;
|
||||
}
|
||||
static inline u32 fb_mmu_vpr_info_r(void)
|
||||
{
|
||||
return 0x00100cd0;
|
||||
|
||||
@@ -342,6 +342,30 @@ static inline u32 gr_activity_4_r(void)
|
||||
{
|
||||
return 0x00400390;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_dbg_r(void)
|
||||
{
|
||||
return 0x00501000;
|
||||
}
|
||||
static inline u32 gr_pri_gpcs_gcc_dbg_r(void)
|
||||
{
|
||||
return 0x00419000;
|
||||
}
|
||||
static inline u32 gr_pri_gpcs_gcc_dbg_invalidate_m(void)
|
||||
{
|
||||
return 0x1 << 1;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_tpc0_sm_cache_control_r(void)
|
||||
{
|
||||
return 0x005046a4;
|
||||
}
|
||||
static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_r(void)
|
||||
{
|
||||
return 0x00419ea4;
|
||||
}
|
||||
static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(void)
|
||||
{
|
||||
return 0x1 << 0;
|
||||
}
|
||||
static inline u32 gr_pri_sked_activity_r(void)
|
||||
{
|
||||
return 0x00407054;
|
||||
@@ -2962,6 +2986,10 @@ static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
|
||||
{
|
||||
return 0x0050450c;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(u32 r)
|
||||
{
|
||||
return (r >> 1) & 0x1;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void)
|
||||
{
|
||||
return 0x2;
|
||||
@@ -3010,6 +3038,10 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v(void)
|
||||
{
|
||||
return 0x00000001;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_v(void)
|
||||
{
|
||||
return 0x00000000;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void)
|
||||
{
|
||||
return 0x80000000;
|
||||
@@ -3022,10 +3054,50 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_run_trigger_task_f(void)
|
||||
{
|
||||
return 0x40000000;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_v(u32 r)
|
||||
{
|
||||
return (r >> 1) & 0x1;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f(void)
|
||||
{
|
||||
return 0x0;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_v(u32 r)
|
||||
{
|
||||
return (r >> 2) & 0x1;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f(void)
|
||||
{
|
||||
return 0x0;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void)
|
||||
{
|
||||
return 0x00504614;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void)
|
||||
{
|
||||
return 0x00504624;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void)
|
||||
{
|
||||
return 0x00504634;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_warp_disable_v(void)
|
||||
{
|
||||
return 0x00000000;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_sm_disable_v(void)
|
||||
{
|
||||
return 0x00000000;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void)
|
||||
{
|
||||
return 0x0050460c;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_sm_in_trap_mode_v(u32 r)
|
||||
{
|
||||
return (r >> 0) & 0x1;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r)
|
||||
{
|
||||
return (r >> 4) & 0x1;
|
||||
|
||||
@@ -533,10 +533,15 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
|
||||
+ gpc_offset + tpc_offset,
|
||||
gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
|
||||
|
||||
g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index;
|
||||
g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index;
|
||||
|
||||
sm_id++;
|
||||
}
|
||||
}
|
||||
|
||||
gr->no_of_sm = sm_id;
|
||||
|
||||
for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
|
||||
tpc_per_gpc |= gr->gpc_tpc_count[gpc_index]
|
||||
<< (gr_pd_num_tpc_per_gpc__size_1_v() * gpc_index);
|
||||
|
||||
@@ -214,10 +214,18 @@ static inline u32 fb_mmu_debug_ctrl_debug_v(u32 r)
|
||||
{
|
||||
return (r >> 16) & 0x1;
|
||||
}
|
||||
static inline u32 fb_mmu_debug_ctrl_debug_m(void)
|
||||
{
|
||||
return 0x1 << 16;
|
||||
}
|
||||
static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void)
|
||||
{
|
||||
return 0x00000001;
|
||||
}
|
||||
static inline u32 fb_mmu_debug_ctrl_debug_disabled_v(void)
|
||||
{
|
||||
return 0x00000000;
|
||||
}
|
||||
static inline u32 fb_mmu_vpr_info_r(void)
|
||||
{
|
||||
return 0x00100cd0;
|
||||
|
||||
@@ -330,6 +330,30 @@ static inline u32 gr_activity_4_r(void)
|
||||
{
|
||||
return 0x00400390;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_gcc_dbg_r(void)
|
||||
{
|
||||
return 0x00501000;
|
||||
}
|
||||
static inline u32 gr_pri_gpcs_gcc_dbg_r(void)
|
||||
{
|
||||
return 0x00419000;
|
||||
}
|
||||
static inline u32 gr_pri_gpcs_gcc_dbg_invalidate_m(void)
|
||||
{
|
||||
return 0x1 << 1;
|
||||
}
|
||||
static inline u32 gr_pri_gpc0_tpc0_sm_cache_control_r(void)
|
||||
{
|
||||
return 0x005046a4;
|
||||
}
|
||||
static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_r(void)
|
||||
{
|
||||
return 0x00419ea4;
|
||||
}
|
||||
static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(void)
|
||||
{
|
||||
return 0x1 << 0;
|
||||
}
|
||||
static inline u32 gr_pri_sked_activity_r(void)
|
||||
{
|
||||
return 0x00407054;
|
||||
@@ -2998,6 +3022,10 @@ static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void)
|
||||
{
|
||||
return 0x2;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(u32 r)
|
||||
{
|
||||
return (r >> 1) & 0x1;
|
||||
}
|
||||
static inline u32 gr_gpcs_gpccs_gpc_exception_en_r(void)
|
||||
{
|
||||
return 0x0041ac94;
|
||||
@@ -3054,10 +3082,50 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_run_trigger_task_f(void)
|
||||
{
|
||||
return 0x40000000;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_v(u32 r)
|
||||
{
|
||||
return (r >> 1) & 0x1;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f(void)
|
||||
{
|
||||
return 0x0;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_v(u32 r)
|
||||
{
|
||||
return (r >> 2) & 0x1;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f(void)
|
||||
{
|
||||
return 0x0;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void)
|
||||
{
|
||||
return 0x00504614;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void)
|
||||
{
|
||||
return 0x00504624;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void)
|
||||
{
|
||||
return 0x00504634;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_warp_disable_v(void)
|
||||
{
|
||||
return 0x00000000;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_sm_disable_v(void)
|
||||
{
|
||||
return 0x00000000;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void)
|
||||
{
|
||||
return 0x0050460c;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_sm_in_trap_mode_v(u32 r)
|
||||
{
|
||||
return (r >> 0) & 0x1;
|
||||
}
|
||||
static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r)
|
||||
{
|
||||
return (r >> 4) & 0x1;
|
||||
|
||||
@@ -252,6 +252,36 @@ struct nvgpu_gpu_l2_fb_args {
|
||||
__u32 reserved;
|
||||
} __packed;
|
||||
|
||||
struct nvgpu_gpu_inval_icache_args {
|
||||
int channel_fd;
|
||||
__u32 reserved;
|
||||
} __packed;
|
||||
|
||||
struct nvgpu_gpu_mmu_debug_mode_args {
|
||||
__u32 state;
|
||||
__u32 reserved;
|
||||
} __packed;
|
||||
|
||||
struct nvgpu_gpu_sm_debug_mode_args {
|
||||
int channel_fd;
|
||||
__u32 enable;
|
||||
__u64 sms;
|
||||
} __packed;
|
||||
|
||||
struct warpstate {
|
||||
__u64 valid_warps;
|
||||
__u64 trapped_warps;
|
||||
__u64 paused_warps;
|
||||
};
|
||||
|
||||
struct nvgpu_gpu_wait_pause_args {
|
||||
__u64 pwarpstate;
|
||||
};
|
||||
|
||||
struct nvgpu_gpu_tpc_exception_en_status_args {
|
||||
__u64 tpc_exception_en_sm_mask;
|
||||
};
|
||||
|
||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
||||
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
||||
@@ -276,9 +306,19 @@ struct nvgpu_gpu_l2_fb_args {
|
||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 11, struct nvgpu_gpu_open_channel_args)
|
||||
#define NVGPU_GPU_IOCTL_FLUSH_L2 \
|
||||
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 12, struct nvgpu_gpu_l2_fb_args)
|
||||
#define NVGPU_GPU_IOCTL_INVAL_ICACHE \
|
||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 13, struct nvgpu_gpu_inval_icache_args)
|
||||
#define NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE \
|
||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 14, struct nvgpu_gpu_mmu_debug_mode_args)
|
||||
#define NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE \
|
||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 15, struct nvgpu_gpu_sm_debug_mode_args)
|
||||
#define NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE \
|
||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 16, struct nvgpu_gpu_wait_pause_args)
|
||||
#define NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS \
|
||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 17, struct nvgpu_gpu_tpc_exception_en_status_args)
|
||||
|
||||
#define NVGPU_GPU_IOCTL_LAST \
|
||||
_IOC_NR(NVGPU_GPU_IOCTL_FLUSH_L2)
|
||||
_IOC_NR(NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS)
|
||||
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
||||
sizeof(struct nvgpu_gpu_prepare_compressible_read_args)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user