diff --git a/drivers/gpu/nvgpu/common/regops/regops.c b/drivers/gpu/nvgpu/common/regops/regops.c index e4d74da2f..feb4b6067 100644 --- a/drivers/gpu/nvgpu/common/regops/regops.c +++ b/drivers/gpu/nvgpu/common/regops/regops.c @@ -122,6 +122,15 @@ int exec_regops_gk20a(struct gk20a *g, continue; } + /* + * Move to next op if current op is invalid. + * Execution will reach here only if CONTINUE_ON_ERROR + * mode is requested. + */ + if (ops[i].status != REGOP(STATUS_SUCCESS)) { + continue; + } + switch (ops[i].op) { case REGOP(READ_32): diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c index 965bd6f81..ed4111116 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c @@ -1400,6 +1400,16 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg, if (ctx_op_nr >= num_ctx_ops[pass]) { break; } + + /* + * Move to next op if current op is invalid. + * Execution will reach here only if CONTINUE_ON_ERROR + * mode is requested. + */ + if (ctx_ops[i].status != REGOP(STATUS_SUCCESS)) { + continue; + } + /* only do ctx ops and only on the right pass */ if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) || (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) || diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index 04503267d..9d4e8d560 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -579,7 +579,7 @@ static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s) * Convert common regops op values of the form of NVGPU_DBG_REG_OP_* * into linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_* */ -static u32 nvgpu_get_regops_op_values_linux(u32 regops_op) +u32 nvgpu_get_regops_op_values_linux(u32 regops_op) { switch (regops_op) { case REGOP(READ_32): @@ -603,7 +603,7 @@ static u32 nvgpu_get_regops_op_values_linux(u32 regops_op) * Convert linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_* * into common regops op values of the form of NVGPU_DBG_REG_OP_* */ -static u32 nvgpu_get_regops_op_values_common(u32 regops_op) +u32 nvgpu_get_regops_op_values_common(u32 regops_op) { switch (regops_op) { case REGOP_LINUX(READ_32): @@ -679,7 +679,7 @@ static u32 nvgpu_get_regops_type_values_common(u32 regops_type) * Convert common regops status values of the form of NVGPU_DBG_REG_OP_STATUS_* * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_* */ -static u32 nvgpu_get_regops_status_values_linux(u32 regops_status) +u32 nvgpu_get_regops_status_values_linux(u32 regops_status) { switch (regops_status) { case REGOP(STATUS_SUCCESS): @@ -703,7 +703,7 @@ static u32 nvgpu_get_regops_status_values_linux(u32 regops_status) * Convert linux regops status values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_* * into common regops type values of the form of NVGPU_DBG_REG_OP_STATUS_* */ -static u32 nvgpu_get_regops_status_values_common(u32 regops_status) +u32 nvgpu_get_regops_status_values_common(u32 regops_status) { switch (regops_status) { case REGOP_LINUX(STATUS_SUCCESS): diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h index 2e188cc04..bfa088eb4 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h @@ -35,4 +35,9 @@ unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait); /* used by profiler driver interface */ int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp); +u32 nvgpu_get_regops_op_values_common(u32 regops_op); +u32 nvgpu_get_regops_status_values_common(u32 regops_status); +u32 nvgpu_get_regops_op_values_linux(u32 regops_op); +u32 nvgpu_get_regops_status_values_linux(u32 regops_status); + #endif diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c index e2f3841f4..d4fd21723 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c @@ -25,16 +25,42 @@ #include #include #include +#include #include #include #include "os_linux.h" #include "ioctl_prof.h" +#include "ioctl_dbg.h" #include "ioctl_tsg.h" +#define NVGPU_PROF_UMD_COPY_WINDOW_SIZE SZ_4K + struct nvgpu_profiler_object_priv { struct nvgpu_profiler_object *prof; struct gk20a *g; + + /* + * Staging buffer to hold regops copied from userspace. + * Regops are stored in struct nvgpu_profiler_reg_op format. This + * struct is added for new profiler design and is trimmed down + * version of legacy regop struct nvgpu_dbg_reg_op. + * + * Struct nvgpu_profiler_reg_op is OS specific struct and cannot + * be used in common nvgpu code. + */ + struct nvgpu_profiler_reg_op *regops_umd_copy_buf; + + /* + * Staging buffer to execute regops in common code. + * Regops are stored in struct nvgpu_dbg_reg_op which is defined + * in common code. + * + * Regops in struct nvgpu_profiler_reg_op should be first converted + * to this format and this handle should be passed for regops + * execution. + */ + struct nvgpu_dbg_reg_op *regops_staging_buf; }; static int nvgpu_prof_fops_open(struct gk20a *g, struct file *filp, @@ -42,6 +68,7 @@ static int nvgpu_prof_fops_open(struct gk20a *g, struct file *filp, { struct nvgpu_profiler_object_priv *prof_priv; struct nvgpu_profiler_object *prof; + u32 num_regops; int err; nvgpu_log(g, gpu_dbg_prof, "Request to open profiler session with scope %u", @@ -54,19 +81,40 @@ static int nvgpu_prof_fops_open(struct gk20a *g, struct file *filp, err = nvgpu_profiler_alloc(g, &prof, scope); if (err != 0) { - nvgpu_kfree(g, prof_priv); - return -ENOMEM; + goto free_priv; } prof_priv->g = g; prof_priv->prof = prof; filp->private_data = prof_priv; + prof_priv->regops_umd_copy_buf = nvgpu_kzalloc(g, + NVGPU_PROF_UMD_COPY_WINDOW_SIZE); + if (prof_priv->regops_umd_copy_buf == NULL) { + goto free_prof; + } + + num_regops = NVGPU_PROF_UMD_COPY_WINDOW_SIZE / + sizeof(prof_priv->regops_umd_copy_buf[0]); + prof_priv->regops_staging_buf = nvgpu_kzalloc(g, + num_regops * sizeof(prof_priv->regops_staging_buf[0])); + if (prof_priv->regops_staging_buf == NULL) { + goto free_umd_buf; + } + nvgpu_log(g, gpu_dbg_prof, "Profiler session with scope %u created successfully with profiler handle %u", scope, prof->prof_handle); return 0; + +free_umd_buf: + nvgpu_kfree(g, prof_priv->regops_umd_copy_buf); +free_prof: + nvgpu_profiler_free(prof); +free_priv: + nvgpu_kfree(g, prof_priv); + return err; } int nvgpu_prof_dev_fops_open(struct inode *inode, struct file *filp) @@ -132,6 +180,10 @@ int nvgpu_prof_fops_release(struct inode *inode, struct file *filp) prof->scope, prof->prof_handle); nvgpu_profiler_free(prof); + + nvgpu_kfree(g, prof_priv->regops_umd_copy_buf); + nvgpu_kfree(g, prof_priv->regops_staging_buf); + nvgpu_kfree(g, prof_priv); nvgpu_put(g); @@ -283,6 +335,168 @@ static int nvgpu_prof_ioctl_unbind_pm_resources(struct nvgpu_profiler_object *pr return nvgpu_profiler_unbind_pm_resources(prof); } +static void nvgpu_prof_get_regops_staging_data(struct nvgpu_profiler_object *prof, + struct nvgpu_profiler_reg_op *in, + struct nvgpu_dbg_reg_op *out, u32 num_ops) +{ + u32 i; + u8 reg_op_type = 0U; + + switch (prof->scope) { + case NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE: + if (prof->tsg != NULL) { + reg_op_type = NVGPU_DBG_REG_OP_TYPE_GR_CTX; + } else { + reg_op_type = NVGPU_DBG_REG_OP_TYPE_GLOBAL; + } + break; + case NVGPU_PROFILER_PM_RESERVATION_SCOPE_CONTEXT: + reg_op_type = NVGPU_DBG_REG_OP_TYPE_GR_CTX; + break; + } + + for (i = 0; i < num_ops; i++) { + out[i].op = nvgpu_get_regops_op_values_common(in[i].op); + out[i].type = reg_op_type; + out[i].status = nvgpu_get_regops_status_values_common(in[i].status); + out[i].quad = 0U; + out[i].group_mask = 0U; + out[i].sub_group_mask = 0U; + out[i].offset = in[i].offset; + out[i].value_lo = u64_lo32(in[i].value); + out[i].value_hi = u64_hi32(in[i].value); + out[i].and_n_mask_lo = u64_lo32(in[i].and_n_mask); + out[i].and_n_mask_hi = u64_hi32(in[i].and_n_mask); + } +} + +static void nvgpu_prof_get_regops_linux_data(struct nvgpu_dbg_reg_op *in, + struct nvgpu_profiler_reg_op *out, u32 num_ops) +{ + u32 i; + + for (i = 0; i < num_ops; i++) { + out[i].op = nvgpu_get_regops_op_values_linux(in[i].op); + out[i].status = nvgpu_get_regops_status_values_linux(in[i].status); + out[i].offset = in[i].offset; + out[i].value = hi32_lo32_to_u64(in[i].value_hi, in[i].value_lo); + out[i].and_n_mask = hi32_lo32_to_u64(in[i].and_n_mask_hi, in[i].and_n_mask_lo); + } +} + +static int nvgpu_prof_ioctl_exec_reg_ops(struct nvgpu_profiler_object_priv *priv, + struct nvgpu_profiler_exec_reg_ops_args *args) +{ + struct nvgpu_profiler_object *prof = priv->prof; + struct gk20a *g = prof->g; + struct nvgpu_tsg *tsg = prof->tsg; + u32 num_regops_in_copy_buf = NVGPU_PROF_UMD_COPY_WINDOW_SIZE / + sizeof(priv->regops_umd_copy_buf[0]); + u32 ops_offset = 0; + u32 flags = 0U; + bool all_passed = true; + int err; + + nvgpu_log(g, gpu_dbg_prof, + "REG_OPS for handle %u: count=%u mode=%u flags=0x%x", + prof->prof_handle, args->count, args->mode, args->flags); + + if (args->count == 0) { + return -EINVAL; + } + + if (args->count > NVGPU_IOCTL_DBG_REG_OPS_LIMIT) { + nvgpu_err(g, "regops limit exceeded"); + return -EINVAL; + } + + if (!prof->bound) { + nvgpu_err(g, "PM resources are not bound to profiler"); + return -EINVAL; + } + + if (args->mode == NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR) { + flags |= NVGPU_REG_OP_FLAG_MODE_CONTINUE_ON_ERROR; + } else { + flags |= NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE; + } + + while (ops_offset < args->count) { + const u32 num_ops = + min(args->count - ops_offset, num_regops_in_copy_buf); + const u64 fragment_size = + num_ops * sizeof(priv->regops_umd_copy_buf[0]); + void __user *const user_fragment = + (void __user *)(uintptr_t) + (args->ops + + ops_offset * sizeof(priv->regops_umd_copy_buf[0])); + + nvgpu_log(g, gpu_dbg_prof, "Regops fragment: start_op=%u ops=%u", + ops_offset, num_ops); + + if (copy_from_user(priv->regops_umd_copy_buf, + user_fragment, fragment_size)) { + nvgpu_err(g, "copy_from_user failed!"); + err = -EFAULT; + break; + } + + nvgpu_prof_get_regops_staging_data(prof, + priv->regops_umd_copy_buf, + priv->regops_staging_buf, num_ops); + + if (args->mode == NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR) { + flags &= ~NVGPU_REG_OP_FLAG_ALL_PASSED; + } + + err = g->ops.regops.exec_regops(g, tsg, + priv->regops_staging_buf, num_ops, + &flags); + if (err) { + nvgpu_err(g, "regop execution failed"); + break; + } + + if (ops_offset == 0) { + if (flags & NVGPU_REG_OP_FLAG_DIRECT_OPS) { + args->flags |= + NVGPU_PROFILER_EXEC_REG_OPS_ARG_FLAG_DIRECT_OPS; + } + } + + if (args->mode == NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR) { + if ((flags & NVGPU_REG_OP_FLAG_ALL_PASSED) == 0) { + all_passed = false; + } + } + + nvgpu_prof_get_regops_linux_data( + priv->regops_staging_buf, + priv->regops_umd_copy_buf, num_ops); + + if (copy_to_user(user_fragment, + priv->regops_umd_copy_buf, + fragment_size)) { + nvgpu_err(g, "copy_to_user failed!"); + err = -EFAULT; + break; + } + + ops_offset += num_ops; + } + + if (args->mode == NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR + && all_passed && (err == 0)) { + args->flags |= NVGPU_PROFILER_EXEC_REG_OPS_ARG_FLAG_ALL_PASSED; + } + + nvgpu_log(g, gpu_dbg_prof, + "REG_OPS for handle %u complete: count=%u mode=%u flags=0x%x err=%d", + prof->prof_handle, args->count, args->mode, args->flags, err); + + return err; +} + long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -341,6 +555,11 @@ long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd, err = nvgpu_prof_ioctl_unbind_pm_resources(prof); break; + case NVGPU_PROFILER_IOCTL_EXEC_REG_OPS: + err = nvgpu_prof_ioctl_exec_reg_ops(prof_priv, + (struct nvgpu_profiler_exec_reg_ops_args *)buf); + break; + default: nvgpu_err(g, "unrecognized profiler ioctl cmd: 0x%x", cmd); err = -ENOTTY; diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 508090e9f..33850bb53 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -1577,14 +1577,31 @@ struct nvgpu_profiler_pma_stream_update_get_put_args { __u32 reserved[3]; }; -enum { - NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_ALL_OR_NONE, - NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR, -}; +/* + * MODE_ALL_OR_NONE + * Reg_ops execution will bail out if any of the reg_op is not valid + * or if there is any other error such as failure to access context image. + * Subsequent reg_ops will not be executed and nvgpu_profiler_reg_op.status + * will not be populated for them. + * IOCTL will always return error for all of the errors. + */ +#define NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_ALL_OR_NONE 0U +/* + * MODE_CONTINUE_ON_ERROR + * This mode allows continuing reg_ops execution even if some of the + * reg_ops are not valid. Invalid reg_ops will be skipped and valid + * ones will be executed. + * IOCTL will return error only if there is some other severe failure + * such as failure to access context image. + * If any of the reg_op is invalid, or if didn't pass, it will be + * reported via NVGPU_PROFILER_EXEC_REG_OPS_ARG_FLAG_ALL_PASSED flag. + * IOCTL will return success in such cases. + */ +#define NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR 1U struct nvgpu_profiler_reg_op { - __u8 op; - __u8 status; + __u8 op; /* Operation in the form NVGPU_DBG_GPU_REG_OP_READ/WRITE_* */ + __u8 status; /* Status in the form NVGPU_DBG_GPU_REG_OP_STATUS_* */ __u32 offset; __u64 value; __u64 and_n_mask; @@ -1593,7 +1610,10 @@ struct nvgpu_profiler_reg_op { struct nvgpu_profiler_exec_reg_ops_args { __u32 mode; /* in: operation mode NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_* */ - __u32 count; /* in: number of reg_ops operations */ + __u32 count; /* in: number of reg_ops operations, + * upper limit nvgpu_gpu_characteristics.reg_ops_limit + */ + __u64 ops; /* in/out: pointer to actual operations nvgpu_profiler_reg_op */ /* out: if all reg_ops passed, valid only for MODE_CONTINUE_ON_ERROR */