gpu: nvgpu: rework regops execution API

Rework regops execution API to accomodate below updates for new
profiler design

- gops.regops.exec_regops() should accept TSG pointer instead of
  channel pointer.
- Remove individual boolean parameters and add one flag field.

Below new flags are added to this API :
NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE
NVGPU_REG_OP_FLAG_MODE_CONTINUE_ON_ERROR
NVGPU_REG_OP_FLAG_ALL_PASSED
NVGPU_REG_OP_FLAG_DIRECT_OPS

Update other APIs, e.g. gr_gk20a_exec_ctx_ops() and validate_reg_ops()
as per new API changes.

Add new API gk20a_is_tsg_ctx_resident() to check context residency
from TSG pointer.

Convert gr_gk20a_ctx_patch_smpc() to a HAL gops.gr.ctx_patch_smpc().
Set this HAL only for gm20b since it is not required for later chips.
Also, remove subcontext code from this function since gm20b does not
support subcontext.

Remove stale comment about missing vGPU support in exec_regops_gk20a()

Bug 2510974
Jira NVGPU-5360

Change-Id: I3c25c34277b5ca88484da1e20d459118f15da102
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2389733
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2020-07-29 18:25:19 +05:30
committed by Alex Waterman
parent a73b5d3c6f
commit 6daa0636d1
13 changed files with 172 additions and 131 deletions

View File

@@ -81,14 +81,14 @@ static bool validate_reg_ops(struct gk20a *g,
u32 *ctx_rd_count, u32 *ctx_wr_count, u32 *ctx_rd_count, u32 *ctx_wr_count,
struct nvgpu_dbg_reg_op *ops, struct nvgpu_dbg_reg_op *ops,
u32 op_count, u32 op_count,
bool is_profiler); bool valid_ctx,
u32 *flags);
int exec_regops_gk20a(struct gk20a *g, int exec_regops_gk20a(struct gk20a *g,
struct nvgpu_channel *ch, struct nvgpu_tsg *tsg,
struct nvgpu_dbg_reg_op *ops, struct nvgpu_dbg_reg_op *ops,
u32 num_ops, u32 num_ops,
bool is_profiler, u32 *flags)
bool *is_current_ctx)
{ {
int err = 0; int err = 0;
unsigned int i; unsigned int i;
@@ -99,20 +99,8 @@ int exec_regops_gk20a(struct gk20a *g,
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
/* For vgpu, the regops routines need to be handled in the
* context of the server and support for that does not exist.
*
* The two users of the regops interface are the compute driver
* and tools. The compute driver will work without a functional
* regops implementation, so we return -ENOSYS. This will allow
* compute apps to run with vgpu. Tools will not work in this
* configuration and are not required to work at this time. */
if (g->is_virtual) {
return -ENOSYS;
}
ok = validate_reg_ops(g, &ctx_rd_count, &ctx_wr_count, ok = validate_reg_ops(g, &ctx_rd_count, &ctx_wr_count,
ops, num_ops, is_profiler); ops, num_ops, tsg != NULL, flags);
if (!ok) { if (!ok) {
nvgpu_err(g, "invalid op(s)"); nvgpu_err(g, "invalid op(s)");
err = -EINVAL; err = -EINVAL;
@@ -211,9 +199,9 @@ int exec_regops_gk20a(struct gk20a *g,
} }
if ((ctx_wr_count | ctx_rd_count) != 0U) { if ((ctx_wr_count | ctx_rd_count) != 0U) {
err = gr_gk20a_exec_ctx_ops(ch, ops, num_ops, err = gr_gk20a_exec_ctx_ops(tsg, ops, num_ops,
ctx_wr_count, ctx_rd_count, ctx_wr_count, ctx_rd_count,
is_current_ctx); flags);
if (err != 0) { if (err != 0) {
nvgpu_warn(g, "failed to perform ctx ops\n"); nvgpu_warn(g, "failed to perform ctx ops\n");
goto clean_up; goto clean_up;
@@ -269,7 +257,7 @@ static int validate_reg_op_info(struct nvgpu_dbg_reg_op *op)
static bool check_whitelists(struct gk20a *g, static bool check_whitelists(struct gk20a *g,
struct nvgpu_dbg_reg_op *op, struct nvgpu_dbg_reg_op *op,
u32 offset, u32 offset,
bool is_profiler) bool valid_ctx)
{ {
bool valid = false; bool valid = false;
@@ -283,7 +271,7 @@ static bool check_whitelists(struct gk20a *g,
regop_bsearch_range_cmp) != NULL); regop_bsearch_range_cmp) != NULL);
/* if debug session, search context list */ /* if debug session, search context list */
if ((!valid) && (!is_profiler)) { if ((!valid) && (valid_ctx)) {
/* binary search context list */ /* binary search context list */
valid = (g->ops.regops.get_context_whitelist_ranges != NULL) && valid = (g->ops.regops.get_context_whitelist_ranges != NULL) &&
(nvgpu_bsearch(&offset, (nvgpu_bsearch(&offset,
@@ -294,7 +282,7 @@ static bool check_whitelists(struct gk20a *g,
} }
/* if debug session, search runcontrol list */ /* if debug session, search runcontrol list */
if ((!valid) && (!is_profiler)) { if ((!valid) && (valid_ctx)) {
valid = (g->ops.regops.get_runcontrol_whitelist != NULL) && valid = (g->ops.regops.get_runcontrol_whitelist != NULL) &&
linear_search(offset, linear_search(offset,
g->ops.regops.get_runcontrol_whitelist(), g->ops.regops.get_runcontrol_whitelist(),
@@ -310,7 +298,7 @@ static bool check_whitelists(struct gk20a *g,
regop_bsearch_range_cmp) != NULL); regop_bsearch_range_cmp) != NULL);
/* if debug session, search runcontrol list */ /* if debug session, search runcontrol list */
if ((!valid) && (!is_profiler)) { if ((!valid) && (valid_ctx)) {
valid = (g->ops.regops.get_runcontrol_whitelist != NULL) && valid = (g->ops.regops.get_runcontrol_whitelist != NULL) &&
linear_search(offset, linear_search(offset,
g->ops.regops.get_runcontrol_whitelist(), g->ops.regops.get_runcontrol_whitelist(),
@@ -324,7 +312,7 @@ static bool check_whitelists(struct gk20a *g,
/* note: the op here has already been through validate_reg_op_info */ /* note: the op here has already been through validate_reg_op_info */
static int validate_reg_op_offset(struct gk20a *g, static int validate_reg_op_offset(struct gk20a *g,
struct nvgpu_dbg_reg_op *op, struct nvgpu_dbg_reg_op *op,
bool is_profiler) bool valid_ctx)
{ {
int err; int err;
u32 buf_offset_lo, buf_offset_addr, num_offsets, offset; u32 buf_offset_lo, buf_offset_addr, num_offsets, offset;
@@ -340,9 +328,9 @@ static int validate_reg_op_offset(struct gk20a *g,
return -EINVAL; return -EINVAL;
} }
valid = check_whitelists(g, op, offset, is_profiler); valid = check_whitelists(g, op, offset, valid_ctx);
if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) { if ((op->op == REGOP(READ_64) || op->op == REGOP(WRITE_64)) && valid) {
valid = check_whitelists(g, op, offset + 4U, is_profiler); valid = check_whitelists(g, op, offset + 4U, valid_ctx);
} }
if (valid && (op->type != REGOP(TYPE_GLOBAL))) { if (valid && (op->type != REGOP(TYPE_GLOBAL))) {
@@ -383,19 +371,23 @@ static bool validate_reg_ops(struct gk20a *g,
u32 *ctx_rd_count, u32 *ctx_wr_count, u32 *ctx_rd_count, u32 *ctx_wr_count,
struct nvgpu_dbg_reg_op *ops, struct nvgpu_dbg_reg_op *ops,
u32 op_count, u32 op_count,
bool is_profiler) bool valid_ctx,
u32 *flags)
{ {
u32 i; bool all_or_none = (*flags) & NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE;
bool ok = true;
bool gr_ctx_ops = false; bool gr_ctx_ops = false;
bool op_failed = false;
u32 i;
/* keep going until the end so every op can get /* keep going until the end so every op can get
* a separate error code if needed */ * a separate error code if needed */
for (i = 0; i < op_count; i++) { for (i = 0; i < op_count; i++) {
if (validate_reg_op_info(&ops[i]) != 0) { if (validate_reg_op_info(&ops[i]) != 0) {
ok = false; op_failed = true;
break; if (all_or_none) {
break;
}
} }
if (reg_op_is_gr_ctx(ops[i].type)) { if (reg_op_is_gr_ctx(ops[i].type)) {
@@ -408,28 +400,42 @@ static bool validate_reg_ops(struct gk20a *g,
gr_ctx_ops = true; gr_ctx_ops = true;
} }
/* context operations are not valid on profiler session */ /* context operations need valid context */
if (gr_ctx_ops && is_profiler) { if (gr_ctx_ops && !valid_ctx) {
ok = false; op_failed = true;
break; if (all_or_none) {
break;
}
} }
/* if "allow_all" flag enabled, dont validate offset */ /* if "allow_all" flag enabled, dont validate offset */
if (!g->allow_all) { if (!g->allow_all) {
if (validate_reg_op_offset(g, &ops[i], if (validate_reg_op_offset(g, &ops[i], valid_ctx) != 0) {
is_profiler) != 0) { op_failed = true;
ok = false; if (all_or_none) {
break; break;
}
} }
} }
} }
if (ok) { nvgpu_log(g, gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d",
nvgpu_log(g, gpu_dbg_gpu_dbg, "ctx_wrs:%d ctx_rds:%d", *ctx_wr_count, *ctx_rd_count);
*ctx_wr_count, *ctx_rd_count);
if (all_or_none) {
if (op_failed) {
return false;
} else {
return true;
}
} }
return ok; /* Continue on error */
if (!op_failed) {
*flags |= NVGPU_REG_OP_FLAG_ALL_PASSED;
}
return true;
} }
/* exported for tools like cyclestats, etc */ /* exported for tools like cyclestats, etc */

View File

@@ -35,11 +35,10 @@
#include "common/vgpu/ivc/comm_vgpu.h" #include "common/vgpu/ivc/comm_vgpu.h"
int vgpu_exec_regops(struct gk20a *g, int vgpu_exec_regops(struct gk20a *g,
struct nvgpu_channel *ch, struct nvgpu_tsg *tsg,
struct nvgpu_dbg_reg_op *ops, struct nvgpu_dbg_reg_op *ops,
u32 num_ops, u32 num_ops,
bool is_profiler, u32 *flags)
bool *is_current_ctx)
{ {
struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_reg_ops_params *p = &msg.params.reg_ops; struct tegra_vgpu_reg_ops_params *p = &msg.params.reg_ops;
@@ -68,17 +67,15 @@ int vgpu_exec_regops(struct gk20a *g,
msg.cmd = TEGRA_VGPU_CMD_REG_OPS; msg.cmd = TEGRA_VGPU_CMD_REG_OPS;
msg.handle = vgpu_get_handle(g); msg.handle = vgpu_get_handle(g);
p->handle = ch ? ch->virt_ctx : 0; p->tsg_id = tsg ? tsg->tsgid : U32_MAX;
p->num_ops = num_ops; p->num_ops = num_ops;
p->is_profiler = is_profiler; p->flags = *flags;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
err = err ? err : msg.ret; err = err ? err : msg.ret;
if (err == 0) { if (err == 0) {
nvgpu_memcpy((u8 *)ops, (u8 *)oob, ops_size); nvgpu_memcpy((u8 *)ops, (u8 *)oob, ops_size);
if (is_current_ctx != NULL) {
*is_current_ctx = p->is_current_ctx != 0u;
}
} }
*flags = p->flags;
fail: fail:
vgpu_ivc_oob_put_ptr(handle); vgpu_ivc_oob_put_ptr(handle);

View File

@@ -30,11 +30,10 @@ struct gk20a;
struct nvgpu_channel; struct nvgpu_channel;
int vgpu_exec_regops(struct gk20a *g, int vgpu_exec_regops(struct gk20a *g,
struct nvgpu_channel *ch, struct nvgpu_tsg *tsg,
struct nvgpu_dbg_reg_op *ops, struct nvgpu_dbg_reg_op *ops,
u32 num_ops, u32 num_ops,
bool is_profiler, u32 *flags);
bool *is_current_ctx);
int vgpu_dbg_set_powergate(struct dbg_session_gk20a *dbg_s, int vgpu_dbg_set_powergate(struct dbg_session_gk20a *dbg_s,
bool disable_powergate); bool disable_powergate);

View File

@@ -625,8 +625,7 @@ void gk20a_gr_init_ovr_sm_dsm_perf(void)
* which makes it impossible to know externally whether a ctx * which makes it impossible to know externally whether a ctx
* write will actually occur. so later we should put a lazy, * write will actually occur. so later we should put a lazy,
* map-and-hold system in the patch write state */ * map-and-hold system in the patch write state */
static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
struct nvgpu_channel *ch,
u32 addr, u32 data, u32 addr, u32 data,
struct nvgpu_gr_ctx *gr_ctx) struct nvgpu_gr_ctx *gr_ctx)
{ {
@@ -663,15 +662,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
nvgpu_gr_ctx_patch_write(g, gr_ctx, nvgpu_gr_ctx_patch_write(g, gr_ctx,
addr, data, true); addr, data, true);
if (ch->subctx != NULL) { nvgpu_gr_ctx_set_patch_ctx(g, gr_ctx,
nvgpu_gr_ctx_set_patch_ctx(g, gr_ctx,
false);
nvgpu_gr_subctx_set_patch_ctx(g,
ch->subctx, gr_ctx);
} else {
nvgpu_gr_ctx_set_patch_ctx(g, gr_ctx,
true); true);
}
/* we're not caching these on cpu side, /* we're not caching these on cpu side,
but later watch for it */ but later watch for it */
@@ -1303,14 +1295,10 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
return -EINVAL; return -EINVAL;
} }
bool gk20a_is_channel_ctx_resident(struct nvgpu_channel *ch) static struct nvgpu_channel *gk20a_get_resident_ctx(struct gk20a *g, u32 *tsgid)
{ {
u32 curr_gr_ctx; u32 curr_gr_ctx;
u32 curr_gr_tsgid;
struct gk20a *g = ch->g;
struct nvgpu_channel *curr_ch; struct nvgpu_channel *curr_ch;
bool ret = false;
struct nvgpu_tsg *tsg;
curr_gr_ctx = g->ops.gr.falcon.get_current_ctx(g); curr_gr_ctx = g->ops.gr.falcon.get_current_ctx(g);
@@ -1320,20 +1308,27 @@ bool gk20a_is_channel_ctx_resident(struct nvgpu_channel *ch)
* valid context is currently resident. * valid context is currently resident.
*/ */
if (gr_fecs_current_ctx_valid_v(curr_gr_ctx) == 0U) { if (gr_fecs_current_ctx_valid_v(curr_gr_ctx) == 0U) {
return false; return NULL;
} }
curr_ch = nvgpu_gr_intr_get_channel_from_ctx(g, curr_gr_ctx, curr_ch = nvgpu_gr_intr_get_channel_from_ctx(g, curr_gr_ctx, tsgid);
&curr_gr_tsgid);
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
"curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d" "curr_gr_chid=%d curr_tsgid=%d",
" ch->chid=%d", (curr_ch != NULL) ? curr_ch->chid : U32_MAX, *tsgid);
(curr_ch != NULL) ? curr_ch->chid : U32_MAX,
curr_gr_tsgid,
ch->tsgid,
ch->chid);
return curr_ch;
}
bool gk20a_is_channel_ctx_resident(struct nvgpu_channel *ch)
{
u32 curr_gr_tsgid;
struct gk20a *g = ch->g;
struct nvgpu_channel *curr_ch;
bool ret = false;
struct nvgpu_tsg *tsg;
curr_ch = gk20a_get_resident_ctx(g, &curr_gr_tsgid);
if (curr_ch == NULL) { if (curr_ch == NULL) {
return false; return false;
} }
@@ -1351,13 +1346,33 @@ bool gk20a_is_channel_ctx_resident(struct nvgpu_channel *ch)
return ret; return ret;
} }
static int gr_exec_ctx_ops(struct nvgpu_channel *ch, static bool gk20a_is_tsg_ctx_resident(struct nvgpu_tsg *tsg)
{
u32 curr_gr_tsgid;
struct gk20a *g = tsg->g;
struct nvgpu_channel *curr_ch;
bool ret = false;
curr_ch = gk20a_get_resident_ctx(g, &curr_gr_tsgid);
if (curr_ch == NULL) {
return false;
}
if ((tsg->tsgid == curr_gr_tsgid) &&
(tsg->tsgid == curr_ch->tsgid)) {
ret = true;
}
nvgpu_channel_put(curr_ch);
return ret;
}
static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops, struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
u32 num_ctx_wr_ops, u32 num_ctx_rd_ops, u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
bool ch_is_curr_ctx) bool ctx_resident)
{ {
struct gk20a *g = ch->g; struct gk20a *g = tsg->g;
struct nvgpu_tsg *tsg;
struct nvgpu_gr_ctx *gr_ctx; struct nvgpu_gr_ctx *gr_ctx;
bool gr_ctx_ready = false; bool gr_ctx_ready = false;
bool pm_ctx_ready = false; bool pm_ctx_ready = false;
@@ -1376,14 +1391,9 @@ static int gr_exec_ctx_ops(struct nvgpu_channel *ch,
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
num_ctx_wr_ops, num_ctx_rd_ops); num_ctx_wr_ops, num_ctx_rd_ops);
tsg = nvgpu_tsg_from_ch(ch);
if (tsg == NULL) {
return -EINVAL;
}
gr_ctx = tsg->gr_ctx; gr_ctx = tsg->gr_ctx;
if (ch_is_curr_ctx) { if (ctx_resident) {
for (pass = 0; pass < 2; pass++) { for (pass = 0; pass < 2; pass++) {
ctx_op_nr = 0; ctx_op_nr = 0;
for (i = 0; i < num_ops; ++i) { for (i = 0; i < num_ops; ++i) {
@@ -1549,10 +1559,11 @@ static int gr_exec_ctx_ops(struct nvgpu_channel *ch,
offsets[j] + 4U, v); offsets[j] + 4U, v);
} }
if (current_mem == nvgpu_gr_ctx_get_ctx_mem(gr_ctx)) { if (current_mem == nvgpu_gr_ctx_get_ctx_mem(gr_ctx) &&
g->ops.gr.ctx_patch_smpc != NULL) {
/* check to see if we need to add a special WAR /* check to see if we need to add a special WAR
for some of the SMPC perf regs */ for some of the SMPC perf regs */
gr_gk20a_ctx_patch_smpc(g, ch, g->ops.gr.ctx_patch_smpc(g,
offset_addrs[j], offset_addrs[j],
v, gr_ctx); v, gr_ctx);
} }
@@ -1591,14 +1602,14 @@ static int gr_exec_ctx_ops(struct nvgpu_channel *ch,
return err; return err;
} }
int gr_gk20a_exec_ctx_ops(struct nvgpu_channel *ch, int gr_gk20a_exec_ctx_ops(struct nvgpu_tsg *tsg,
struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops, struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
u32 num_ctx_wr_ops, u32 num_ctx_rd_ops, u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
bool *is_curr_ctx) u32 *flags)
{ {
struct gk20a *g = ch->g; struct gk20a *g = tsg->g;
int err, tmp_err; int err, tmp_err;
bool ch_is_curr_ctx; bool ctx_resident;
/* disable channel switching. /* disable channel switching.
* at that point the hardware state can be inspected to * at that point the hardware state can be inspected to
@@ -1611,15 +1622,16 @@ int gr_gk20a_exec_ctx_ops(struct nvgpu_channel *ch,
return err; return err;
} }
ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch); ctx_resident = gk20a_is_tsg_ctx_resident(tsg);
if (is_curr_ctx != NULL) { if (ctx_resident) {
*is_curr_ctx = ch_is_curr_ctx; *flags |= NVGPU_REG_OP_FLAG_DIRECT_OPS;
} }
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d",
ch_is_curr_ctx);
err = gr_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops, nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d",
num_ctx_rd_ops, ch_is_curr_ctx); ctx_resident);
err = gr_exec_ctx_ops(tsg, ctx_ops, num_ops, num_ctx_wr_ops,
num_ctx_rd_ops, ctx_resident);
tmp_err = nvgpu_gr_enable_ctxsw(g); tmp_err = nvgpu_gr_enable_ctxsw(g);
if (tmp_err != 0) { if (tmp_err != 0) {
@@ -1865,6 +1877,12 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
u32 no_of_sm = g->ops.gr.init.get_no_of_sm(g); u32 no_of_sm = g->ops.gr.init.get_no_of_sm(g);
struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
u32 flags = NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE;
if (tsg == NULL) {
return -EINVAL;
}
ops = nvgpu_kcalloc(g, no_of_sm, sizeof(*ops)); ops = nvgpu_kcalloc(g, no_of_sm, sizeof(*ops));
if (ops == NULL) { if (ops == NULL) {
@@ -1910,7 +1928,7 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
i++; i++;
} }
err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0, NULL); err = gr_gk20a_exec_ctx_ops(tsg, ops, i, i, 0, &flags);
if (err != 0) { if (err != 0) {
nvgpu_err(g, "Failed to access register"); nvgpu_err(g, "Failed to access register");
} }

View File

@@ -34,15 +34,16 @@ struct nvgpu_tsg;
struct nvgpu_warpstate; struct nvgpu_warpstate;
struct dbg_session_gk20a; struct dbg_session_gk20a;
struct nvgpu_dbg_reg_op; struct nvgpu_dbg_reg_op;
struct nvgpu_gr_ctx;
enum ctxsw_addr_type; enum ctxsw_addr_type;
/* sm */ /* sm */
bool gk20a_gr_sm_debugger_attached(struct gk20a *g); bool gk20a_gr_sm_debugger_attached(struct gk20a *g);
int gr_gk20a_exec_ctx_ops(struct nvgpu_channel *ch, int gr_gk20a_exec_ctx_ops(struct nvgpu_tsg *tsg,
struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops, struct nvgpu_dbg_reg_op *ctx_ops, u32 num_ops,
u32 num_ctx_wr_ops, u32 num_ctx_rd_ops, u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
bool *is_curr_ctx); u32 *flags);
int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
u32 addr, u32 max_offsets, u32 addr, u32 max_offsets,
u32 *offsets, u32 *offset_addrs, u32 *offsets, u32 *offset_addrs,
@@ -57,6 +58,9 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
struct nvgpu_tsg *tsg, struct nvgpu_tsg *tsg,
u64 gpu_va, u32 mode); u64 gpu_va, u32 mode);
int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
u32 addr, u32 data,
struct nvgpu_gr_ctx *gr_ctx);
void gk20a_gr_resume_single_sm(struct gk20a *g, void gk20a_gr_resume_single_sm(struct gk20a *g,
u32 gpc, u32 tpc, u32 sm); u32 gpc, u32 tpc, u32 sm);
void gk20a_gr_resume_all_sms(struct gk20a *g); void gk20a_gr_resume_all_sms(struct gk20a *g);

View File

@@ -555,12 +555,13 @@ int gm20b_gr_set_mmu_debug_mode(struct gk20a *g,
}; };
int err; int err;
struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
u32 flags = NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE;
if (tsg == NULL) { if (tsg == NULL) {
return enable ? -EINVAL : 0; return enable ? -EINVAL : 0;
} }
err = gr_gk20a_exec_ctx_ops(ch, &ctx_ops, 1, 1, 0, NULL); err = gr_gk20a_exec_ctx_ops(tsg, &ctx_ops, 1, 1, 0, &flags);
if (err != 0) { if (err != 0) {
nvgpu_err(g, "update MMU debug mode failed"); nvgpu_err(g, "update MMU debug mode failed");
} }

View File

@@ -867,14 +867,13 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g,
unsigned int i = 0, sm_id; unsigned int i = 0, sm_id;
u32 no_of_sm = g->ops.gr.init.get_no_of_sm(g); u32 no_of_sm = g->ops.gr.init.get_no_of_sm(g);
int err; int err;
#ifdef CONFIG_NVGPU_SM_DIVERSITY
struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
u32 flags = NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE;
if (tsg == NULL) { if (tsg == NULL) {
nvgpu_err(g, "gv11b_gr_set_sm_debug_mode failed=>tsg NULL"); nvgpu_err(g, "gv11b_gr_set_sm_debug_mode failed=>tsg NULL");
return -EINVAL; return -EINVAL;
} }
#endif
ops = nvgpu_kcalloc(g, no_of_sm, sizeof(*ops)); ops = nvgpu_kcalloc(g, no_of_sm, sizeof(*ops));
if (ops == NULL) { if (ops == NULL) {
@@ -945,7 +944,7 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g,
i++; i++;
} }
err = gr_gk20a_exec_ctx_ops(ch, ops, i, i, 0, NULL); err = gr_gk20a_exec_ctx_ops(tsg, ops, i, i, 0, &flags);
if (err != 0) { if (err != 0) {
nvgpu_err(g, "Failed to access register"); nvgpu_err(g, "Failed to access register");
} }

View File

@@ -212,6 +212,7 @@ static const struct gpu_ops gm20b_ops = {
.bpt_reg_info = gr_gm20b_bpt_reg_info, .bpt_reg_info = gr_gm20b_bpt_reg_info,
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
.ctx_patch_smpc = gr_gk20a_ctx_patch_smpc,
.set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode, .set_mmu_debug_mode = gm20b_gr_set_mmu_debug_mode,
.clear_sm_error_state = gm20b_gr_clear_sm_error_state, .clear_sm_error_state = gm20b_gr_clear_sm_error_state,
.suspend_contexts = gr_gk20a_suspend_contexts, .suspend_contexts = gr_gk20a_suspend_contexts,

View File

@@ -422,11 +422,10 @@ struct gpu_ops {
#ifdef CONFIG_NVGPU_DEBUGGER #ifdef CONFIG_NVGPU_DEBUGGER
struct { struct {
int (*exec_regops)(struct gk20a *g, int (*exec_regops)(struct gk20a *g,
struct nvgpu_channel *ch, struct nvgpu_tsg *tsg,
struct nvgpu_dbg_reg_op *ops, struct nvgpu_dbg_reg_op *ops,
u32 num_ops, u32 num_ops,
bool is_profiler, u32 *flags);
bool *is_current_ctx);
const struct regop_offset_range* ( const struct regop_offset_range* (
*get_global_whitelist_ranges)(void); *get_global_whitelist_ranges)(void);
u64 (*get_global_whitelist_ranges_count)(void); u64 (*get_global_whitelist_ranges_count)(void);

View File

@@ -1109,6 +1109,9 @@ struct gops_gr {
struct nvgpu_tsg *tsg, struct nvgpu_tsg *tsg,
u64 gpu_va, u64 gpu_va,
u32 mode); u32 mode);
int (*ctx_patch_smpc)(struct gk20a *g,
u32 addr, u32 data,
struct nvgpu_gr_ctx *gr_ctx);
void (*init_hwpm_pmm_register)(struct gk20a *g); void (*init_hwpm_pmm_register)(struct gk20a *g);
void (*get_num_hwpm_perfmon)(struct gk20a *g, u32 *num_sys_perfmon, void (*get_num_hwpm_perfmon)(struct gk20a *g, u32 *num_sys_perfmon,
u32 *num_fbp_perfmon, u32 *num_fbp_perfmon,

View File

@@ -26,6 +26,11 @@
#ifdef CONFIG_NVGPU_DEBUGGER #ifdef CONFIG_NVGPU_DEBUGGER
#include <nvgpu/types.h>
struct gk20a;
struct nvgpu_tsg;
/* /*
* Register operations * Register operations
* All operations are targeted towards first channel * All operations are targeted towards first channel
@@ -57,6 +62,11 @@
#define NVGPU_DBG_REG_OP_STATUS_UNSUPPORTED_OP 0x00000008U #define NVGPU_DBG_REG_OP_STATUS_UNSUPPORTED_OP 0x00000008U
#define NVGPU_DBG_REG_OP_STATUS_INVALID_MASK 0x00000010U #define NVGPU_DBG_REG_OP_STATUS_INVALID_MASK 0x00000010U
#define NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE BIT32(1U)
#define NVGPU_REG_OP_FLAG_MODE_CONTINUE_ON_ERROR BIT32(2U)
#define NVGPU_REG_OP_FLAG_ALL_PASSED BIT32(3U)
#define NVGPU_REG_OP_FLAG_DIRECT_OPS BIT32(4U)
struct nvgpu_dbg_reg_op { struct nvgpu_dbg_reg_op {
u8 op; u8 op;
u8 type; u8 type;
@@ -77,11 +87,10 @@ struct regop_offset_range {
}; };
int exec_regops_gk20a(struct gk20a *g, int exec_regops_gk20a(struct gk20a *g,
struct nvgpu_channel *ch, struct nvgpu_tsg *tsg,
struct nvgpu_dbg_reg_op *ops, struct nvgpu_dbg_reg_op *ops,
u32 num_ops, u32 num_ops,
bool is_profiler, u32 *flags);
bool *is_current_ctx);
/* turn seriously unwieldy names -> something shorter */ /* turn seriously unwieldy names -> something shorter */
#define REGOP(x) NVGPU_DBG_REG_OP_##x #define REGOP(x) NVGPU_DBG_REG_OP_##x

View File

@@ -328,10 +328,9 @@ struct tegra_vgpu_reg_op {
}; };
struct tegra_vgpu_reg_ops_params { struct tegra_vgpu_reg_ops_params {
u64 handle;
u64 num_ops; u64 num_ops;
u32 is_profiler; u32 tsg_id;
u8 is_current_ctx; u32 flags;
}; };
struct tegra_vgpu_channel_priority_params { struct tegra_vgpu_channel_priority_params {

View File

@@ -778,12 +778,10 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
{ {
int err = 0, powergate_err = 0; int err = 0, powergate_err = 0;
bool is_pg_disabled = false; bool is_pg_disabled = false;
struct gk20a *g = dbg_s->g; struct gk20a *g = dbg_s->g;
struct nvgpu_channel *ch; struct nvgpu_channel *ch;
struct nvgpu_tsg *tsg = NULL;
bool is_current_ctx = false; u32 flags = NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE;
nvgpu_log_fn(g, "%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops); nvgpu_log_fn(g, "%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops);
@@ -813,6 +811,14 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
return -EINVAL; return -EINVAL;
} }
if (ch != NULL) {
tsg = nvgpu_tsg_from_ch(ch);
if (tsg == NULL) {
nvgpu_err(g, "channel not bound to TSG");
return -EINVAL;
}
}
/* since exec_reg_ops sends methods to the ucode, it must take the /* since exec_reg_ops sends methods to the ucode, it must take the
* global gpu lock to protect against mixing methods from debug sessions * global gpu lock to protect against mixing methods from debug sessions
* on other channels */ * on other channels */
@@ -869,16 +875,16 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
if (err) if (err)
break; break;
err = g->ops.regops.exec_regops(g, ch, err = g->ops.regops.exec_regops(g, tsg,
g->dbg_regops_tmp_buf, num_ops, g->dbg_regops_tmp_buf, num_ops, &flags);
dbg_s->is_profiler, &is_current_ctx);
if (err) { if (err) {
break; break;
} }
if (ops_offset == 0) { if (ops_offset == 0) {
args->gr_ctx_resident = is_current_ctx; args->gr_ctx_resident =
flags & NVGPU_REG_OP_FLAG_DIRECT_OPS;
} }
err = nvgpu_get_regops_data_linux(g->dbg_regops_tmp_buf, err = nvgpu_get_regops_data_linux(g->dbg_regops_tmp_buf,