From 3f0ea98b7326a1aa785ac81dfeafea0c6f451ff6 Mon Sep 17 00:00:00 2001 From: Lili Sang Date: Mon, 16 Nov 2020 13:30:20 -0800 Subject: [PATCH] gpu: nvgpu: Add get_gr_context support for Linux. Implement the feature of retrieving gr context contents for all chips. Two IOCTLs, NVGPU_DBG_GPU_IOCTL_GET_GR_CONTEXT_SIZE and _GET_GR_CONTEXT, are added. Bug 3102903 Change-Id: If11006f4e294f190785a2c3159ca491b9f3b5187 Signed-off-by: Lili Sang Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2449183 Reviewed-by: automaticguardword Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-misra Reviewed-by: svc-mobile-cert Reviewed-by: Chris Johnson Reviewed-by: Alex Waterman Reviewed-by: mobile promotions GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/init/nvgpu_init.c | 2 + .../gpu/nvgpu/common/vgpu/init/init_vgpu.c | 1 + drivers/gpu/nvgpu/include/nvgpu/enabled.h | 2 + drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 2 + drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | 194 +++++++++++++++++- include/uapi/linux/nvgpu.h | 26 ++- 6 files changed, 225 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c index a59f496f8..6bf21720c 100644 --- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -872,6 +872,8 @@ int nvgpu_init_gpu_characteristics(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, false); } + nvgpu_set_enabled(g, NVGPU_SUPPORT_GET_GR_CONTEXT, true); + return 0; } diff --git a/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c index 5c6f008c7..2e81ebb20 100644 --- a/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c @@ -129,6 +129,7 @@ int vgpu_init_gpu_characteristics(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, false); nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false); nvgpu_set_enabled(g, NVGPU_SUPPORT_SM_TTU, priv->constants.support_sm_ttu != 0U); + nvgpu_set_enabled(g, NVGPU_SUPPORT_GET_GR_CONTEXT, false); /* per-device identifier */ g->per_device_identifier = priv->constants.per_device_identifier; diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index 2d630b1b0..0f130be7c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -199,6 +199,8 @@ struct gk20a; "Profiler V2 context object support"), \ DEFINE_FLAG(NVGPU_SUPPORT_SMPC_GLOBAL_MODE, \ "SMPC in global mode support"), \ + DEFINE_FLAG(NVGPU_SUPPORT_GET_GR_CONTEXT, \ + "Get gr context support"), \ DEFINE_FLAG(NVGPU_PMU_NEXT_CORE_ENABLED, "PMU NEXT CORE enabled"), \ DEFINE_FLAG(NVGPU_MAX_ENABLED_BITS, "Marks max number of flags"), diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 62136f63e..3ec2c2fd2 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -289,6 +289,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = { NVGPU_SUPPORT_PROFILER_V2_CONTEXT}, {NVGPU_GPU_FLAGS_SUPPORT_SMPC_GLOBAL_MODE, NVGPU_SUPPORT_SMPC_GLOBAL_MODE}, + {NVGPU_GPU_FLAGS_SUPPORT_GET_GR_CONTEXT, + NVGPU_SUPPORT_GET_GR_CONTEXT}, }; static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index e63c831b8..e66f09138 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -43,6 +43,7 @@ #include #include #include +#include #include @@ -56,6 +57,8 @@ #include "ioctl.h" #include "dmabuf_vidmem.h" +#include "common/gr/ctx_priv.h" + struct dbg_session_gk20a_linux { struct device *dev; struct dbg_session_gk20a dbg_s; @@ -154,6 +157,9 @@ static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s); static int gk20a_dbg_gpu_do_dev_open(struct gk20a *g, struct file *filp, bool is_profiler); +static int nvgpu_dbg_get_context_buffer(struct gk20a *g, struct nvgpu_mem *ctx_mem, + void __user *ctx_buf, u32 ctx_buf_size); + unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait) { unsigned int mask = 0; @@ -1744,6 +1750,183 @@ static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s, args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE; } +static int nvgpu_dbg_gpu_ioctl_get_gr_context_size(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_get_gr_context_size_args *args) +{ + struct gk20a *g = dbg_s->g; + struct nvgpu_channel *ch; + struct nvgpu_tsg *tsg; + struct nvgpu_mem *ctx_mem; + + nvgpu_log_fn(g, " "); + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_GR_CONTEXT)) { + nvgpu_err(g, "get_gr_context is not supported on current config"); + return -EINVAL; + } + + ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (ch == NULL) { + nvgpu_err(g, "no bound channel"); + return -EINVAL; + } + + tsg = nvgpu_tsg_from_ch(ch); + if (tsg == NULL) { + nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid); + return -EINVAL; + } + + ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx); + if (ctx_mem == NULL || !nvgpu_mem_is_valid(ctx_mem)) { + nvgpu_err(g, "invalid context mem"); + return -EINVAL; + } + + if (ctx_mem->size > (u64)UINT_MAX) { + nvgpu_err(ch->g, "ctx size is larger than expected"); + return -EINVAL; + } + + args->size = (u32)ctx_mem->size; + + return 0; +} + +static int nvgpu_dbg_gpu_ioctl_get_gr_context(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_get_gr_context_args *args) +{ + struct gk20a *g = dbg_s->g; + struct nvgpu_channel *ch; + struct nvgpu_tsg *tsg; + struct nvgpu_mem *ctx_mem; + void __user *user_buffer = (void __user *)(uintptr_t)args->buffer; + u32 size; + int err = 0, enable_err = 0; + + nvgpu_log_fn(g, " "); + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_GR_CONTEXT)) { + nvgpu_err(g, "get_gr_context is not supported on current config"); + return -EINVAL; + } + + ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (ch == NULL) { + nvgpu_err(g, "no bound channel"); + return -EINVAL; + } + + tsg = nvgpu_tsg_from_ch(ch); + if (tsg == NULL) { + nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid); + return -EINVAL; + } + + ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx); + if (ctx_mem == NULL || !nvgpu_mem_is_valid(ctx_mem)) { + nvgpu_err(g, "invalid context mem"); + return -EINVAL; + } + + if (ctx_mem->size > (u64)UINT_MAX) { + nvgpu_err(ch->g, "ctx size is larger than expected"); + return -EINVAL; + } + + /* Check if the input buffer size equals the gr context size */ + size = (u32)ctx_mem->size; + if (args->size != size) { + nvgpu_err(g, "size mismatch: %d != %d", args->size, size); + return -EINVAL; + } + + if (nvgpu_channel_disable_tsg(g, ch) != 0) { + nvgpu_err(g, "failed to disable channel/TSG"); + return -EINVAL; + } + + err = nvgpu_preempt_channel(g, ch); + if (err != 0) { + nvgpu_err(g, "failed to preempt channel/TSG"); + goto done; + } + + /* Channel gr_ctx buffer is gpu cacheable. + Flush and invalidate before cpu update. */ + err = g->ops.mm.cache.l2_flush(g, true); + if (err != 0) { + nvgpu_err(g, "l2_flush failed"); + goto done; + } + + err = nvgpu_dbg_get_context_buffer(g, ctx_mem, user_buffer, size); + +done: + enable_err = nvgpu_channel_enable_tsg(g, ch); + if (enable_err != 0) { + nvgpu_err(g, "failed to re-enable channel/TSG"); + return (err != 0) ? err : enable_err; + } + + return err; +} + +static int nvgpu_dbg_get_context_buffer(struct gk20a *g, struct nvgpu_mem *ctx_mem, + void __user *ctx_buf, u32 ctx_buf_size) +{ + int err = 0; +#ifdef CONFIG_NVGPU_DGPU + void *buffer = NULL; + u32 size, access_size; + u32 access_limit_size = SZ_4K; + u32 offset = 0; +#endif + + if (ctx_mem->aperture == APERTURE_SYSMEM) { + if (ctx_mem->cpu_va == NULL) { + nvgpu_err(g, "CPU pointer is NULL. Note that this feature is currently \ + not supported on virtual GPU."); + err = -EINVAL; + } else { + err = copy_to_user(ctx_buf, ctx_mem->cpu_va, ctx_buf_size); + } + } +#ifdef CONFIG_NVGPU_DGPU + else { + /* We already checked nvgpu_mem_is_valid, so ctx_mem->aperture must be + APERTURE_VIDMEM if we reach here */ + + buffer = nvgpu_big_zalloc(g, access_limit_size); + if (buffer == NULL) { + err = -ENOMEM; + goto done; + } + + size = ctx_buf_size; + while (size > 0) { + /* Max access size of access_limit_size in one loop */ + access_size = min(access_limit_size, size); + + nvgpu_mem_rd_n(g, ctx_mem, offset, buffer, access_size); + + err = copy_to_user(ctx_buf + offset, buffer, access_size); + if (err != 0) + goto done; + + size -= access_size; + offset += access_size; + } +done: + if (buffer != NULL) { + nvgpu_big_free(g, buffer); + } + } +#endif + + return err; +} + static int gk20a_perfbuf_release_locked(struct gk20a *g, struct dbg_session_gk20a *dbg_s, u64 offset) { @@ -2121,6 +2304,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, (struct nvgpu_dbg_gpu_timeout_args *)buf); break; + case NVGPU_DBG_GPU_IOCTL_GET_GR_CONTEXT_SIZE: + err = nvgpu_dbg_gpu_ioctl_get_gr_context_size(dbg_s, + (struct nvgpu_dbg_gpu_get_gr_context_size_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_GET_GR_CONTEXT: + err = nvgpu_dbg_gpu_ioctl_get_gr_context(dbg_s, + (struct nvgpu_dbg_gpu_get_gr_context_args *)buf); + break; + case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE: err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s, (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf); @@ -2184,7 +2377,6 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, (struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *)buf); break; - default: nvgpu_err(g, "unrecognized dbg gpu ioctl cmd: 0x%x", diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 0a7ef2ab5..ea24b4d26 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -197,6 +197,8 @@ struct nvgpu_gpu_zbc_query_table_args { #define NVGPU_GPU_FLAGS_SUPPORT_PROFILER_V2_CONTEXT (1ULL << 47) /* Profiling SMPC in global mode is supported */ #define NVGPU_GPU_FLAGS_SUPPORT_SMPC_GLOBAL_MODE (1ULL << 48) +/* Retrieving contents of graphics context is supported */ +#define NVGPU_GPU_FLAGS_SUPPORT_GET_GR_CONTEXT (1ULL << 49) /* SM LRF ECC is enabled */ #define NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF (1ULL << 60) /* SM SHM ECC is enabled */ @@ -1505,8 +1507,30 @@ struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args { _IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 26, \ struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args) +/* Get gr context size */ +struct nvgpu_dbg_gpu_get_gr_context_size_args { + __u32 size; + __u32 reserved; +}; + +#define NVGPU_DBG_GPU_IOCTL_GET_GR_CONTEXT_SIZE \ + _IOR(NVGPU_DBG_GPU_IOCTL_MAGIC, 27, \ + struct nvgpu_dbg_gpu_get_gr_context_size_args) + +/* Get gr context */ +struct nvgpu_dbg_gpu_get_gr_context_args { + __u64 buffer; /* in/out: the output buffer containing contents of the gr context. + buffer address is given by the user */ + __u32 size; /* in: size of the context buffer */ + __u32 reserved; +}; + +#define NVGPU_DBG_GPU_IOCTL_GET_GR_CONTEXT \ + _IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 28, \ + struct nvgpu_dbg_gpu_get_gr_context_args) + #define NVGPU_DBG_GPU_IOCTL_LAST \ - _IOC_NR(NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE) + _IOC_NR(NVGPU_DBG_GPU_IOCTL_GET_GR_CONTEXT) #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_dbg_gpu_access_fb_memory_args)