From 3f0ea98b7326a1aa785ac81dfeafea0c6f451ff6 Mon Sep 17 00:00:00 2001
From: Lili Sang <lilis@nvidia.com>
Date: Mon, 16 Nov 2020 13:30:20 -0800
Subject: [PATCH] gpu: nvgpu: Add get_gr_context support for Linux.

Implement the feature of retrieving gr context contents for all chips.
Two IOCTLs, NVGPU_DBG_GPU_IOCTL_GET_GR_CONTEXT_SIZE and _GET_GR_CONTEXT,
are added.

Bug 3102903

Change-Id: If11006f4e294f190785a2c3159ca491b9f3b5187
Signed-off-by: Lili Sang <lilis@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2449183
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Chris Johnson <cwj@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/init/nvgpu_init.c    |   2 +
 .../gpu/nvgpu/common/vgpu/init/init_vgpu.c    |   1 +
 drivers/gpu/nvgpu/include/nvgpu/enabled.h     |   2 +
 drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c       |   2 +
 drivers/gpu/nvgpu/os/linux/ioctl_dbg.c        | 194 +++++++++++++++++-
 include/uapi/linux/nvgpu.h                    |  26 ++-
 6 files changed, 225 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
index a59f496f8..6bf21720c 100644
--- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c
+++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
@@ -872,6 +872,8 @@ int nvgpu_init_gpu_characteristics(struct gk20a *g)
 		nvgpu_set_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP, false);
 	}
 
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_GET_GR_CONTEXT, true);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c
index 5c6f008c7..2e81ebb20 100644
--- a/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c
@@ -129,6 +129,7 @@ int vgpu_init_gpu_characteristics(struct gk20a *g)
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_SPARSE_ALLOCS, false);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, false);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_SM_TTU, priv->constants.support_sm_ttu != 0U);
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_GET_GR_CONTEXT, false);
 
 	/* per-device identifier */
 	g->per_device_identifier = priv->constants.per_device_identifier;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
index 2d630b1b0..0f130be7c 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -199,6 +199,8 @@ struct gk20a;
 		"Profiler V2 context object support"),			\
 	DEFINE_FLAG(NVGPU_SUPPORT_SMPC_GLOBAL_MODE,			\
 		"SMPC in global mode support"),				\
+	DEFINE_FLAG(NVGPU_SUPPORT_GET_GR_CONTEXT,			\
+		"Get gr context support"),				\
 	DEFINE_FLAG(NVGPU_PMU_NEXT_CORE_ENABLED, "PMU NEXT CORE enabled"), \
 	DEFINE_FLAG(NVGPU_MAX_ENABLED_BITS, "Marks max number of flags"),
 
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
index 62136f63e..3ec2c2fd2 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -289,6 +289,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
 		NVGPU_SUPPORT_PROFILER_V2_CONTEXT},
 	{NVGPU_GPU_FLAGS_SUPPORT_SMPC_GLOBAL_MODE,
 		NVGPU_SUPPORT_SMPC_GLOBAL_MODE},
+	{NVGPU_GPU_FLAGS_SUPPORT_GET_GR_CONTEXT,
+		NVGPU_SUPPORT_GET_GR_CONTEXT},
 };
 
 static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index e63c831b8..e66f09138 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -43,6 +43,7 @@
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/power_features/pg.h>
 #include <nvgpu/nvgpu_init.h>
+#include <nvgpu/preempt.h>
 
 #include <nvgpu/linux/vm.h>
 
@@ -56,6 +57,8 @@
 #include "ioctl.h"
 #include "dmabuf_vidmem.h"
 
+#include "common/gr/ctx_priv.h"
+
 struct dbg_session_gk20a_linux {
 	struct device	*dev;
 	struct dbg_session_gk20a dbg_s;
@@ -154,6 +157,9 @@ static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s);
 static int gk20a_dbg_gpu_do_dev_open(struct gk20a *g,
 		struct file *filp, bool is_profiler);
 
+static int nvgpu_dbg_get_context_buffer(struct gk20a *g, struct nvgpu_mem *ctx_mem,
+		void __user *ctx_buf, u32 ctx_buf_size);
+
 unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait)
 {
 	unsigned int mask = 0;
@@ -1744,6 +1750,183 @@ static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s,
 		args->enable = NVGPU_DBG_GPU_IOCTL_TIMEOUT_DISABLE;
 }
 
+static int nvgpu_dbg_gpu_ioctl_get_gr_context_size(struct dbg_session_gk20a *dbg_s,
+			 struct nvgpu_dbg_gpu_get_gr_context_size_args *args)
+{
+	struct gk20a *g = dbg_s->g;
+	struct nvgpu_channel *ch;
+	struct nvgpu_tsg *tsg;
+	struct nvgpu_mem *ctx_mem;
+
+	nvgpu_log_fn(g, " ");
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_GR_CONTEXT)) {
+		nvgpu_err(g, "get_gr_context is not supported on current config");
+		return -EINVAL;
+	}
+
+	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+	if (ch == NULL) {
+		nvgpu_err(g, "no bound channel");
+		return -EINVAL;
+	}
+
+	tsg = nvgpu_tsg_from_ch(ch);
+	if (tsg == NULL) {
+		nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid);
+		return -EINVAL;
+	}
+
+	ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx);
+	if (ctx_mem == NULL || !nvgpu_mem_is_valid(ctx_mem)) {
+		nvgpu_err(g, "invalid context mem");
+		return -EINVAL;
+	}
+
+	if (ctx_mem->size > (u64)UINT_MAX) {
+		nvgpu_err(ch->g, "ctx size is larger than expected");
+		return -EINVAL;
+	}
+
+	args->size = (u32)ctx_mem->size;
+
+	return 0;
+}
+
+static int nvgpu_dbg_gpu_ioctl_get_gr_context(struct dbg_session_gk20a *dbg_s,
+			struct nvgpu_dbg_gpu_get_gr_context_args *args)
+{
+	struct gk20a *g = dbg_s->g;
+	struct nvgpu_channel *ch;
+	struct nvgpu_tsg *tsg;
+	struct nvgpu_mem *ctx_mem;
+	void __user *user_buffer = (void __user *)(uintptr_t)args->buffer;
+	u32 size;
+	int err = 0, enable_err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_GR_CONTEXT)) {
+		nvgpu_err(g, "get_gr_context is not supported on current config");
+		return -EINVAL;
+	}
+
+	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+	if (ch == NULL) {
+		nvgpu_err(g, "no bound channel");
+		return -EINVAL;
+	}
+
+	tsg = nvgpu_tsg_from_ch(ch);
+	if (tsg == NULL) {
+		nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid);
+		return -EINVAL;
+	}
+
+	ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx);
+	if (ctx_mem == NULL || !nvgpu_mem_is_valid(ctx_mem)) {
+		nvgpu_err(g, "invalid context mem");
+		return -EINVAL;
+	}
+
+	if (ctx_mem->size > (u64)UINT_MAX) {
+		nvgpu_err(ch->g, "ctx size is larger than expected");
+		return -EINVAL;
+	}
+
+	/* Check if the input buffer size equals the gr context size */
+	size = (u32)ctx_mem->size;
+	if (args->size != size) {
+		nvgpu_err(g, "size mismatch: %d != %d", args->size, size);
+		return -EINVAL;
+	}
+
+	if (nvgpu_channel_disable_tsg(g, ch) != 0) {
+		nvgpu_err(g, "failed to disable channel/TSG");
+		return -EINVAL;
+	}
+
+	err = nvgpu_preempt_channel(g, ch);
+	if (err != 0) {
+		nvgpu_err(g, "failed to preempt channel/TSG");
+		goto done;
+	}
+
+	/* Channel gr_ctx buffer is gpu cacheable.
+	   Flush and invalidate before cpu update. */
+	err = g->ops.mm.cache.l2_flush(g, true);
+	if (err != 0) {
+		nvgpu_err(g, "l2_flush failed");
+		goto done;
+	}
+
+	err = nvgpu_dbg_get_context_buffer(g, ctx_mem, user_buffer, size);
+
+done:
+	enable_err = nvgpu_channel_enable_tsg(g, ch);
+	if (enable_err != 0) {
+		nvgpu_err(g, "failed to re-enable channel/TSG");
+		return (err != 0) ? err : enable_err;
+	}
+
+	return err;
+}
+
+static int nvgpu_dbg_get_context_buffer(struct gk20a *g, struct nvgpu_mem *ctx_mem,
+			void __user *ctx_buf, u32 ctx_buf_size)
+{
+	int err = 0;
+#ifdef CONFIG_NVGPU_DGPU
+	void *buffer = NULL;
+	u32 size, access_size;
+	u32 access_limit_size = SZ_4K;
+	u32 offset = 0;
+#endif
+
+	if (ctx_mem->aperture == APERTURE_SYSMEM) {
+		if (ctx_mem->cpu_va == NULL) {
+			nvgpu_err(g, "CPU pointer is NULL. Note that this feature is currently \
+				not supported on virtual GPU.");
+			err = -EINVAL;
+		} else {
+			err = copy_to_user(ctx_buf, ctx_mem->cpu_va, ctx_buf_size);
+		}
+	}
+#ifdef CONFIG_NVGPU_DGPU
+	else {
+		/* We already checked nvgpu_mem_is_valid, so ctx_mem->aperture must be
+		   APERTURE_VIDMEM if we reach here */
+
+		buffer = nvgpu_big_zalloc(g, access_limit_size);
+		if (buffer == NULL) {
+			err = -ENOMEM;
+			goto done;
+		}
+
+		size = ctx_buf_size;
+		while (size > 0) {
+			/* Max access size of access_limit_size in one loop */
+			access_size = min(access_limit_size, size);
+
+			nvgpu_mem_rd_n(g, ctx_mem, offset, buffer, access_size);
+
+			err = copy_to_user(ctx_buf + offset, buffer, access_size);
+			if (err != 0)
+				goto done;
+
+			size -= access_size;
+			offset += access_size;
+		}
+done:
+		if (buffer != NULL) {
+			nvgpu_big_free(g, buffer);
+		}
+	}
+#endif
+
+	return err;
+}
+
 static int gk20a_perfbuf_release_locked(struct gk20a *g,
 		struct dbg_session_gk20a *dbg_s, u64 offset)
 {
@@ -2121,6 +2304,16 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 			   (struct nvgpu_dbg_gpu_timeout_args *)buf);
 		break;
 
+	case NVGPU_DBG_GPU_IOCTL_GET_GR_CONTEXT_SIZE:
+		err = nvgpu_dbg_gpu_ioctl_get_gr_context_size(dbg_s,
+			(struct nvgpu_dbg_gpu_get_gr_context_size_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_GET_GR_CONTEXT:
+		err = nvgpu_dbg_gpu_ioctl_get_gr_context(dbg_s,
+			(struct nvgpu_dbg_gpu_get_gr_context_args *)buf);
+		break;
+
 	case NVGPU_DBG_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
 		err = nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(dbg_s,
 		   (struct nvgpu_dbg_gpu_read_single_sm_error_state_args *)buf);
@@ -2184,7 +2377,6 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 		   (struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args *)buf);
 		break;
 
-
 	default:
 		nvgpu_err(g,
 			   "unrecognized dbg gpu ioctl cmd: 0x%x",
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 0a7ef2ab5..ea24b4d26 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -197,6 +197,8 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_PROFILER_V2_CONTEXT	(1ULL << 47)
 /* Profiling SMPC in global mode is supported */
 #define NVGPU_GPU_FLAGS_SUPPORT_SMPC_GLOBAL_MODE	(1ULL << 48)
+/* Retrieving contents of graphics context is supported */
+#define NVGPU_GPU_FLAGS_SUPPORT_GET_GR_CONTEXT	    (1ULL << 49)
 /* SM LRF ECC is enabled */
 #define NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF	(1ULL << 60)
 /* SM SHM ECC is enabled */
@@ -1505,8 +1507,30 @@ struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args {
 	_IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 26, \
 	struct nvgpu_dbg_gpu_set_ctx_mmu_debug_mode_args)
 
+/* Get gr context size */
+struct nvgpu_dbg_gpu_get_gr_context_size_args {
+	__u32 size;
+	__u32 reserved;
+};
+
+#define NVGPU_DBG_GPU_IOCTL_GET_GR_CONTEXT_SIZE \
+	_IOR(NVGPU_DBG_GPU_IOCTL_MAGIC, 27, \
+	struct nvgpu_dbg_gpu_get_gr_context_size_args)
+
+/* Get gr context */
+struct nvgpu_dbg_gpu_get_gr_context_args {
+	__u64 buffer;    /* in/out: the output buffer containing contents of the gr context.
+						buffer address is given by the user */
+	__u32 size;      /* in: size of the context buffer */
+	__u32 reserved;
+};
+
+#define NVGPU_DBG_GPU_IOCTL_GET_GR_CONTEXT \
+	_IOW(NVGPU_DBG_GPU_IOCTL_MAGIC, 28, \
+	struct nvgpu_dbg_gpu_get_gr_context_args)
+
 #define NVGPU_DBG_GPU_IOCTL_LAST		\
-	_IOC_NR(NVGPU_DBG_GPU_IOCTL_SET_CTX_MMU_DEBUG_MODE)
+	_IOC_NR(NVGPU_DBG_GPU_IOCTL_GET_GR_CONTEXT)
 
 #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE		\
 	sizeof(struct nvgpu_dbg_gpu_access_fb_memory_args)