diff --git a/drivers/gpu/nvgpu/common/regops/regops.c b/drivers/gpu/nvgpu/common/regops/regops.c index 02d23cb8f..e4d74da2f 100644 --- a/drivers/gpu/nvgpu/common/regops/regops.c +++ b/drivers/gpu/nvgpu/common/regops/regops.c @@ -81,14 +81,14 @@ static bool validate_reg_ops(struct gk20a *g, u32 *ctx_rd_count, u32 *ctx_wr_count, struct nvgpu_dbg_reg_op *ops, u32 op_count, - bool is_profiler); + bool valid_ctx, + u32 *flags); int exec_regops_gk20a(struct gk20a *g, - struct nvgpu_channel *ch, + struct nvgpu_tsg *tsg, struct nvgpu_dbg_reg_op *ops, u32 num_ops, - bool is_profiler, - bool *is_current_ctx) + u32 *flags) { int err = 0; unsigned int i; @@ -99,20 +99,8 @@ int exec_regops_gk20a(struct gk20a *g, nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - /* For vgpu, the regops routines need to be handled in the - * context of the server and support for that does not exist. - * - * The two users of the regops interface are the compute driver - * and tools. The compute driver will work without a functional - * regops implementation, so we return -ENOSYS. This will allow - * compute apps to run with vgpu. Tools will not work in this - * configuration and are not required to work at this time. */ - if (g->is_virtual) { - return -ENOSYS; - } - ok = validate_reg_ops(g, &ctx_rd_count, &ctx_wr_count, - ops, num_ops, is_profiler); + ops, num_ops, tsg != NULL, flags); if (!ok) { nvgpu_err(g, "invalid op(s)"); err = -EINVAL; @@ -211,9 +199,9 @@ int exec_regops_gk20a(struct gk20a *g, } if ((ctx_wr_count | ctx_rd_count) != 0U) { - err = gr_gk20a_exec_ctx_ops(ch, ops, num_ops, + err = gr_gk20a_exec_ctx_ops(tsg, ops, num_ops, ctx_wr_count, ctx_rd_count, - is_current_ctx); + flags); if (err != 0) { nvgpu_warn(g, "failed to perform ctx ops\n"); goto clean_up; @@ -269,7 +257,7 @@ static int validate_reg_op_info(struct nvgpu_dbg_reg_op *op) static bool check_whitelists(struct gk20a *g, struct nvgpu_dbg_reg_op *op, u32 offset, - bool is_profiler) + bool valid_ctx) { bool valid = false; @@ -283,7 +271,7 @@ static bool check_whitelists(struct gk20a *g, regop_bsearch_range_cmp) != NULL); /* if debug session, search context list */ - if ((!valid) && (!is_profiler)) { + if ((!valid) && (valid_ctx)) { /* binary search context list */ valid = (g->ops.regops.get_context_whitelist_ranges != NULL) && (nvgpu_bsearch(&offset, @@ -294,7 +282,7 @@ static bool check_whitelists(struct gk20a *g, } /* if debug session, search runcontrol list */ - if ((!valid) && (!is_profiler)) { + if ((!valid) && (valid_ctx)) { valid = (g->ops.regops.get_runcontrol_whitelist != NULL) && linear_search(offset, g->ops.regops.get_runcontrol_whitelist(), @@ -310,7 +298,7 @@ static bool check_whitelists(struct gk20a *g, regop_bsearch_range_cmp) != NULL); /* if debug session, search runcontrol list */ - if ((!valid) && (!is_profiler)) { + if ((!valid) && (valid_ctx)) { valid = (g->ops.regops.get_runcontrol_whitelist != NULL) && linear_search(offset, g->ops.regops.get_runcontrol_whitelist(), @@ -324,7 +312,7 @@ static bool check_whitelists(struct gk20a *g, /* note: the op here has already been through validate_reg_op_info */ static int validate_reg_op_offset(struct gk20a *g, struct nvgpu_dbg_reg_op *op, - bool is_profiler) + bool valid_ctx) { int err; u32 buf_offset_lo, buf_offset_addr, num_offsets, offset; @@ -340,9 +328,9 @@ static int validate_reg_op_offset(struct gk20a *g, return -EINVAL; } - valid = check_whitelists(g, op, offset, is_profiler); + valid = check_whitelists(g, op, offset, valid_ctx); if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) { - valid = check_whitelists(g, op, offset + 4U, is_profiler); + valid = check_whitelists(g, op, offset + 4U, valid_ctx); } if (valid && (op->type != REGOP(TYPE_GLOBAL))) { @@ -383,19 +371,23 @@ static bool validate_reg_ops(struct gk20a *g, u32 *ctx_rd_count, u32 *ctx_wr_count, struct nvgpu_dbg_reg_op *ops, u32 op_count, - bool is_profiler) + bool valid_ctx, + u32 *flags) { - u32 i; - bool ok = true; + bool all_or_none = (*flags) & NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE; bool gr_ctx_ops = false; + bool op_failed = false; + u32 i; /* keep going until the end so every op can get * a separate error code if needed */ for (i = 0; i < op_count; i++) { if (validate_reg_op_info(&ops[i]) != 0) { - ok = false; - break; + op_failed = true; + if (all_or_none) { + break; + } } if (reg_op_is_gr_ctx(ops[i].type)) { @@ -408,28 +400,42 @@ static bool validate_reg_ops(struct gk20a *g, gr_ctx_ops = true; } - /* context operations are not valid on profiler session */ - if (gr_ctx_ops && is_profiler) { - ok = false; - break; + /* context operations need valid context */ + if (gr_ctx_ops && !valid_ctx) { + op_failed = true; + if (all_or_none) { + break; + } } /* if "allow_all" flag enabled, dont validate offset */ if (!g->allow_all) { - if (validate_reg_op_offset(g, &ops[i], - is_profiler) != 0) { - ok = false; - break; + if (validate_reg_op_offset(g, &ops[i], valid_ctx) != 0) { + op_failed = true; + if (all_or_none) { + break; + } } } } - if (ok) { - nvgpu_log(g, gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d", - *ctx_wr_count, *ctx_rd_count); + nvgpu_log(g, gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d", + *ctx_wr_count, *ctx_rd_count); + + if (all_or_none) { + if (op_failed) { + return false; + } else { + return true; + } } - return ok; + /* Continue on error */ + if (!op_failed) { + *flags |= NVGPU_REG_OP_FLAG_ALL_PASSED; + } + + return true; } /* exported for tools like cyclestats, etc */ diff --git a/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c index b8b851ba9..2f3d9f754 100644 --- a/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.c @@ -35,11 +35,10 @@ #include "common/vgpu/ivc/comm_vgpu.h" int vgpu_exec_regops(struct gk20a *g, - struct nvgpu_channel *ch, - struct nvgpu_dbg_reg_op *ops, - u32 num_ops, - bool is_profiler, - bool *is_current_ctx) + struct nvgpu_tsg *tsg, + struct nvgpu_dbg_reg_op *ops, + u32 num_ops, + u32 *flags) { struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_reg_ops_params *p = &msg.params.reg_ops; @@ -68,17 +67,15 @@ int vgpu_exec_regops(struct gk20a *g, msg.cmd = TEGRA_VGPU_CMD_REG_OPS; msg.handle = vgpu_get_handle(g); - p->handle = ch ? ch->virt_ctx : 0; + p->tsg_id = tsg ? tsg->tsgid : U32_MAX; p->num_ops = num_ops; - p->is_profiler = is_profiler; + p->flags = *flags; err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); err = err ? err : msg.ret; if (err == 0) { nvgpu_memcpy((u8 *)ops, (u8 *)oob, ops_size); - if (is_current_ctx != NULL) { - *is_current_ctx = p->is_current_ctx != 0u; - } } + *flags = p->flags; fail: vgpu_ivc_oob_put_ptr(handle); diff --git a/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h index 439ff0788..ebe274c8e 100644 --- a/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h +++ b/drivers/gpu/nvgpu/common/vgpu/debugger_vgpu.h @@ -30,11 +30,10 @@ struct gk20a; struct nvgpu_channel; int vgpu_exec_regops(struct gk20a *g, - struct nvgpu_channel *ch, - struct nvgpu_dbg_reg_op *ops, - u32 num_ops, - bool is_profiler, - bool *is_current_ctx); + struct nvgpu_tsg *tsg, + struct nvgpu_dbg_reg_op *ops, + u32 num_ops, + u32 *flags); int vgpu_dbg_set_powergate(struct dbg_session_gk20a *dbg_s, bool disable_powergate); diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c index d46c2ce49..965bd6f81 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c @@ -625,8 +625,7 @@ void gk20a_gr_init_ovr_sm_dsm_perf(void) * which makes it impossible to know externally whether a ctx * write will actually occur. so later we should put a lazy, * map-and-hold system in the patch write state */ -static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, - struct nvgpu_channel *ch, +int gr_gk20a_ctx_patch_smpc(struct gk20a *g, u32 addr, u32 data, struct nvgpu_gr_ctx *gr_ctx) { @@ -663,15 +662,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, nvgpu_gr_ctx_patch_write(g, gr_ctx, addr, data, true); - if (ch->subctx != NULL) { - nvgpu_gr_ctx_set_patch_ctx(g, gr_ctx, - false); - nvgpu_gr_subctx_set_patch_ctx(g, - ch->subctx, gr_ctx); - } else { - nvgpu_gr_ctx_set_patch_ctx(g, gr_ctx, + nvgpu_gr_ctx_set_patch_ctx(g, gr_ctx, true); - } /* we're not caching these on cpu side, but later watch for it */ @@ -1303,14 +1295,10 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, return -EINVAL; } -bool gk20a_is_channel_ctx_resident(struct nvgpu_channel *ch) +static struct nvgpu_channel *gk20a_get_resident_ctx(struct gk20a *g, u32 *tsgid) { u32 curr_gr_ctx; - u32 curr_gr_tsgid; - struct gk20a *g = ch->g; struct nvgpu_channel *curr_ch; - bool ret = false; - struct nvgpu_tsg *tsg; curr_gr_ctx = g->ops.gr.falcon.get_current_ctx(g); @@ -1320,20 +1308,27 @@ bool gk20a_is_channel_ctx_resident(struct nvgpu_channel *ch) * valid context is currently resident. */ if (gr_fecs_current_ctx_valid_v(curr_gr_ctx) == 0U) { - return false; + return NULL; } - curr_ch = nvgpu_gr_intr_get_channel_from_ctx(g, curr_gr_ctx, - &curr_gr_tsgid); + curr_ch = nvgpu_gr_intr_get_channel_from_ctx(g, curr_gr_ctx, tsgid); nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, - "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d" - " ch->chid=%d", - (curr_ch != NULL) ? curr_ch->chid : U32_MAX, - curr_gr_tsgid, - ch->tsgid, - ch->chid); + "curr_gr_chid=%d curr_tsgid=%d", + (curr_ch != NULL) ? curr_ch->chid : U32_MAX, *tsgid); + return curr_ch; +} + +bool gk20a_is_channel_ctx_resident(struct nvgpu_channel *ch) +{ + u32 curr_gr_tsgid; + struct gk20a *g = ch->g; + struct nvgpu_channel *curr_ch; + bool ret = false; + struct nvgpu_tsg *tsg; + + curr_ch = gk20a_get_resident_ctx(g, &curr_gr_tsgid); if (curr_ch == NULL) { return false; } @@ -1351,13 +1346,33 @@ bool gk20a_is_channel_ctx_resident(struct nvgpu_channel *ch) return ret; } -static int gr_exec_ctx_ops(struct nvgpu_channel *ch, +static bool gk20a_is_tsg_ctx_resident(struct nvgpu_tsg *tsg) +{ + u32 curr_gr_tsgid; + struct gk20a *g = tsg->g; + struct nvgpu_channel *curr_ch; + bool ret = false; + + curr_ch = gk20a_get_resident_ctx(g, &curr_gr_tsgid); + if (curr_ch == NULL) { + return false; + } + + if ((tsg->tsgid == curr_gr_tsgid) && + (tsg->tsgid == curr_ch->tsgid)) { + ret = true; + } + + nvgpu_channel_put(curr_ch); + return ret; +} + +static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg, struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops, u32 num_ctx_wr_ops, u32 num_ctx_rd_ops, - bool ch_is_curr_ctx) + bool ctx_resident) { - struct gk20a *g = ch->g; - struct nvgpu_tsg *tsg; + struct gk20a *g = tsg->g; struct nvgpu_gr_ctx *gr_ctx; bool gr_ctx_ready = false; bool pm_ctx_ready = false; @@ -1376,14 +1391,9 @@ static int gr_exec_ctx_ops(struct nvgpu_channel *ch, nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", num_ctx_wr_ops, num_ctx_rd_ops); - tsg = nvgpu_tsg_from_ch(ch); - if (tsg == NULL) { - return -EINVAL; - } - gr_ctx = tsg->gr_ctx; - if (ch_is_curr_ctx) { + if (ctx_resident) { for (pass = 0; pass < 2; pass++) { ctx_op_nr = 0; for (i = 0; i < num_ops; ++i) { @@ -1549,10 +1559,11 @@ static int gr_exec_ctx_ops(struct nvgpu_channel *ch, offsets[j] + 4U, v); } - if (current_mem == nvgpu_gr_ctx_get_ctx_mem(gr_ctx)) { + if (current_mem == nvgpu_gr_ctx_get_ctx_mem(gr_ctx) && + g->ops.gr.ctx_patch_smpc != NULL) { /* check to see if we need to add a special WAR for some of the SMPC perf regs */ - gr_gk20a_ctx_patch_smpc(g, ch, + g->ops.gr.ctx_patch_smpc(g, offset_addrs[j], v, gr_ctx); } @@ -1591,14 +1602,14 @@ static int gr_exec_ctx_ops(struct nvgpu_channel *ch, return err; } -int gr_gk20a_exec_ctx_ops(struct nvgpu_channel *ch, +int gr_gk20a_exec_ctx_ops(struct nvgpu_tsg *tsg, struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops, u32 num_ctx_wr_ops, u32 num_ctx_rd_ops, - bool *is_curr_ctx) + u32 *flags) { - struct gk20a *g = ch->g; + struct gk20a *g = tsg->g; int err, tmp_err; - bool ch_is_curr_ctx; + bool ctx_resident; /* disable channel switching. * at that point the hardware state can be inspected to @@ -1611,15 +1622,16 @@ int gr_gk20a_exec_ctx_ops(struct nvgpu_channel *ch, return err; } - ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch); - if (is_curr_ctx != NULL) { - *is_curr_ctx = ch_is_curr_ctx; + ctx_resident = gk20a_is_tsg_ctx_resident(tsg); + if (ctx_resident) { + *flags |= NVGPU_REG_OP_FLAG_DIRECT_OPS; } - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", - ch_is_curr_ctx); - err = gr_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops, - num_ctx_rd_ops, ch_is_curr_ctx); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", + ctx_resident); + + err = gr_exec_ctx_ops(tsg, ctx_ops, num_ops, num_ctx_wr_ops, + num_ctx_rd_ops, ctx_resident); tmp_err = nvgpu_gr_enable_ctxsw(g); if (tmp_err != 0) { @@ -1865,6 +1877,12 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g, u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); u32 no_of_sm = g->ops.gr.init.get_no_of_sm(g); + struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); + u32 flags = NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE; + + if (tsg == NULL) { + return -EINVAL; + } ops = nvgpu_kcalloc(g, no_of_sm, sizeof(*ops)); if (ops == NULL) { @@ -1910,7 +1928,7 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g, i++; } - err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0, NULL); + err = gr_gk20a_exec_ctx_ops(tsg, ops, i, i, 0, &flags); if (err != 0) { nvgpu_err(g, "Failed to access register"); } diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.h b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.h index 607a6e030..b1fe5ff53 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.h +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.h @@ -34,15 +34,16 @@ struct nvgpu_tsg; struct nvgpu_warpstate; struct dbg_session_gk20a; struct nvgpu_dbg_reg_op; +struct nvgpu_gr_ctx; enum ctxsw_addr_type; /* sm */ bool gk20a_gr_sm_debugger_attached(struct gk20a *g); -int gr_gk20a_exec_ctx_ops(struct nvgpu_channel *ch, +int gr_gk20a_exec_ctx_ops(struct nvgpu_tsg *tsg, struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops, u32 num_ctx_wr_ops, u32 num_ctx_rd_ops, - bool *is_curr_ctx); + u32 *flags); int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, u32 addr, u32 max_offsets, u32 *offsets, u32 *offset_addrs, @@ -57,6 +58,9 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, struct nvgpu_tsg *tsg, u64 gpu_va, u32 mode); +int gr_gk20a_ctx_patch_smpc(struct gk20a *g, + u32 addr, u32 data, + struct nvgpu_gr_ctx *gr_ctx); void gk20a_gr_resume_single_sm(struct gk20a *g, u32 gpc, u32 tpc, u32 sm); void gk20a_gr_resume_all_sms(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c index 7e36267aa..39228be31 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c @@ -555,12 +555,13 @@ int gm20b_gr_set_mmu_debug_mode(struct gk20a *g, }; int err; struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); + u32 flags = NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE; if (tsg == NULL) { return enable ? -EINVAL : 0; } - err = gr_gk20a_exec_ctx_ops(ch, &ctx_ops, 1, 1, 0, NULL); + err = gr_gk20a_exec_ctx_ops(tsg, &ctx_ops, 1, 1, 0, &flags); if (err != 0) { nvgpu_err(g, "update MMU debug mode failed"); } diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gv11b.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gv11b.c index bf826382e..db99fcc00 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gv11b.c @@ -867,14 +867,13 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g, unsigned int i = 0, sm_id; u32 no_of_sm = g->ops.gr.init.get_no_of_sm(g); int err; -#ifdef CONFIG_NVGPU_SM_DIVERSITY struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); + u32 flags = NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE; if (tsg == NULL) { nvgpu_err(g, "gv11b_gr_set_sm_debug_mode failed=>tsg NULL"); return -EINVAL; } -#endif ops = nvgpu_kcalloc(g, no_of_sm, sizeof(*ops)); if (ops == NULL) { @@ -945,7 +944,7 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g, i++; } - err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0, NULL); + err = gr_gk20a_exec_ctx_ops(tsg, ops, i, i, 0, &flags); if (err != 0) { nvgpu_err(g, "Failed to access register"); } diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c index 7d692bf2a..36f2ce437 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c @@ -212,6 +212,7 @@ static const struct gpu_ops gm20b_ops = { .bpt_reg_info = gr_gm20b_bpt_reg_info, .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, + .ctx_patch_smpc = gr_gk20a_ctx_patch_smpc, .set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode, .clear_sm_error_state = gm20b_gr_clear_sm_error_state, .suspend_contexts = gr_gk20a_suspend_contexts, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 2b1ecb7e7..b3117d5c5 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -422,11 +422,10 @@ struct gpu_ops { #ifdef CONFIG_NVGPU_DEBUGGER struct { int (*exec_regops)(struct gk20a *g, - struct nvgpu_channel *ch, - struct nvgpu_dbg_reg_op *ops, - u32 num_ops, - bool is_profiler, - bool *is_current_ctx); + struct nvgpu_tsg *tsg, + struct nvgpu_dbg_reg_op *ops, + u32 num_ops, + u32 *flags); const struct regop_offset_range* ( *get_global_whitelist_ranges)(void); u64 (*get_global_whitelist_ranges_count)(void); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops_gr.h b/drivers/gpu/nvgpu/include/nvgpu/gops_gr.h index 3395cd5d9..6d652225b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops_gr.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops_gr.h @@ -1109,6 +1109,9 @@ struct gops_gr { struct nvgpu_tsg *tsg, u64 gpu_va, u32 mode); + int (*ctx_patch_smpc)(struct gk20a *g, + u32 addr, u32 data, + struct nvgpu_gr_ctx *gr_ctx); void (*init_hwpm_pmm_register)(struct gk20a *g); void (*get_num_hwpm_perfmon)(struct gk20a *g, u32 *num_sys_perfmon, u32 *num_fbp_perfmon, diff --git a/drivers/gpu/nvgpu/include/nvgpu/regops.h b/drivers/gpu/nvgpu/include/nvgpu/regops.h index dad812e89..d60162fed 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/regops.h +++ b/drivers/gpu/nvgpu/include/nvgpu/regops.h @@ -26,6 +26,11 @@ #ifdef CONFIG_NVGPU_DEBUGGER +#include + +struct gk20a; +struct nvgpu_tsg; + /* * Register operations * All operations are targeted towards first channel @@ -57,6 +62,11 @@ #define NVGPU_DBG_REG_OP_STATUS_UNSUPPORTED_OP 0x00000008U #define NVGPU_DBG_REG_OP_STATUS_INVALID_MASK 0x00000010U +#define NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE BIT32(1U) +#define NVGPU_REG_OP_FLAG_MODE_CONTINUE_ON_ERROR BIT32(2U) +#define NVGPU_REG_OP_FLAG_ALL_PASSED BIT32(3U) +#define NVGPU_REG_OP_FLAG_DIRECT_OPS BIT32(4U) + struct nvgpu_dbg_reg_op { u8 op; u8 type; @@ -77,11 +87,10 @@ struct regop_offset_range { }; int exec_regops_gk20a(struct gk20a *g, - struct nvgpu_channel *ch, + struct nvgpu_tsg *tsg, struct nvgpu_dbg_reg_op *ops, u32 num_ops, - bool is_profiler, - bool *is_current_ctx); + u32 *flags); /* turn seriously unwieldy names -> something shorter */ #define REGOP(x) NVGPU_DBG_REG_OP_##x diff --git a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h index 8de8e99ae..14fe9cc25 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h @@ -328,10 +328,9 @@ struct tegra_vgpu_reg_op { }; struct tegra_vgpu_reg_ops_params { - u64 handle; u64 num_ops; - u32 is_profiler; - u8 is_current_ctx; + u32 tsg_id; + u32 flags; }; struct tegra_vgpu_channel_priority_params { diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index a32267f60..04503267d 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -778,12 +778,10 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, { int err = 0, powergate_err = 0; bool is_pg_disabled = false; - struct gk20a *g = dbg_s->g; struct nvgpu_channel *ch; - - bool is_current_ctx = false; - + struct nvgpu_tsg *tsg = NULL; + u32 flags = NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE; nvgpu_log_fn(g, "%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops); @@ -813,6 +811,14 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, return -EINVAL; } + if (ch != NULL) { + tsg = nvgpu_tsg_from_ch(ch); + if (tsg == NULL) { + nvgpu_err(g, "channel not bound to TSG"); + return -EINVAL; + } + } + /* since exec_reg_ops sends methods to the ucode, it must take the * global gpu lock to protect against mixing methods from debug sessions * on other channels */ @@ -869,16 +875,16 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, if (err) break; - err = g->ops.regops.exec_regops(g, ch, - g->dbg_regops_tmp_buf, num_ops, - dbg_s->is_profiler, &is_current_ctx); + err = g->ops.regops.exec_regops(g, tsg, + g->dbg_regops_tmp_buf, num_ops, &flags); if (err) { break; } if (ops_offset == 0) { - args->gr_ctx_resident = is_current_ctx; + args->gr_ctx_resident = + flags & NVGPU_REG_OP_FLAG_DIRECT_OPS; } err = nvgpu_get_regops_data_linux(g->dbg_regops_tmp_buf,