gpu: nvgpu: gk20a: Allow regops lists longer than 128

Process long regops lists in 4-kB fragments, overcoming the overly low limit of 128 reg ops per IOCTL call. Bump the list limit to 1024 and report the limit in GPU characteristics. Bug 200248726 Change-Id: I3ad49139409f32aea8b1226d6562e88edccc8053 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/1253716 (cherry picked from commit 22314619b28f52610cb8769cd4c3f9eb01904eab) Reviewed-on: http://git-master/r/1266652 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2016-11-15 21:03:14 +02:00
parent 98e349ab7e
commit 425f99335b
6 changed files with 88 additions and 34 deletions
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -1042,14 +1042,24 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 	struct device *dev = dbg_s->dev;
 	struct gk20a *g = get_gk20a(dbg_s->dev);
 	struct nvgpu_dbg_gpu_reg_op *ops;
 	struct channel_gk20a *ch;
 	u64 ops_size = sizeof(ops[0]) * args->num_ops;
-	if (args->num_ops > SZ_4K / sizeof(ops[0]))
+	gk20a_dbg_fn("%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops);
 	if (args->num_ops > g->gpu_characteristics.reg_ops_limit) {
 		gk20a_err(dev, "regops limit exceeded");
 		return -EINVAL;
 	}
-	gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size);
+	if (args->num_ops == 0) {
 		/* Nothing to do */
 		return 0;
 	}
 	if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) {
 		gk20a_err(dev, "reg ops work buffer not allocated");
 		return -ENODEV;
 	}
 	if (!dbg_s->id) {
 		gk20a_err(dev, "can't call reg_ops on an unbound debugger session");
@@ -1069,21 +1079,6 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 		return -ENODEV;
 	}
 	ops = kzalloc(ops_size, GFP_KERNEL);
 	if (!ops) {
 		gk20a_err(dev, "Allocating memory failed!");
 		return -ENOMEM;
 	}
 	gk20a_dbg_fn("Copying regops from userspace");
 	if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops,
 							ops_size)) {
 		dev_err(dev, "copy_from_user failed!");
 		err = -EFAULT;
 		goto clean_up;
 	}
 	/* since exec_reg_ops sends methods to the ucode, it must take the
 	 * global gpu lock to protect against mixing methods from debug sessions
 	 * on other channels */
@@ -1099,8 +1094,47 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 	}
 	if (!powergate_err) {
-		err = g->ops.dbg_session_ops.exec_reg_ops(dbg_s, ops,
+		u64 ops_offset = 0; /* index offset */
-							args->num_ops);
+
 		while (ops_offset < args->num_ops && !err) {
 			const u64 num_ops =
 				min(args->num_ops - ops_offset,
 				    (u64)(g->dbg_regops_tmp_buf_ops));
 			const u64 fragment_size =
 				num_ops * sizeof(g->dbg_regops_tmp_buf[0]);
 			void __user *const fragment =
 				(void __user *)(uintptr_t)
 				(args->ops +
 				 ops_offset * sizeof(g->dbg_regops_tmp_buf[0]));
 			gk20a_dbg_fn("Regops fragment: start_op=%llu ops=%llu",
 				     ops_offset, num_ops);
 			gk20a_dbg_fn("Copying regops from userspace");
 			if (copy_from_user(g->dbg_regops_tmp_buf,
 					   fragment, fragment_size)) {
 				dev_err(dev, "copy_from_user failed!");
 				err = -EFAULT;
 				break;
 			}
 			err = g->ops.dbg_session_ops.exec_reg_ops(
 				dbg_s, g->dbg_regops_tmp_buf, num_ops);
 			gk20a_dbg_fn("Copying result to userspace");
 			if (copy_to_user(fragment, g->dbg_regops_tmp_buf,
 					 fragment_size)) {
 				dev_err(dev, "copy_to_user failed!");
 				err = -EFAULT;
 				break;
 			}
 			ops_offset += num_ops;
 		}
 		/* enable powergate, if previously disabled */
 		if (is_pg_disabled) {
 			powergate_err =
@@ -1114,21 +1148,9 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 	if (!err && powergate_err)
 		err = powergate_err;
-	if (err) {
+	if (err)
 		gk20a_err(dev, "dbg regops failed");
 		goto clean_up;
 	}
 	gk20a_dbg_fn("Copying result to userspace");
 	if (copy_to_user((void __user *)(uintptr_t)args->ops, ops, ops_size)) {
 		dev_err(dev, "copy_to_user failed!");
 		err = -EFAULT;
 		goto clean_up;
 	}
 clean_up:
 	kfree(ops);
 	return err;
 }
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -700,6 +700,8 @@ void gk20a_remove_support(struct device *dev)
 #ifdef CONFIG_TEGRA_COMMON
 	tegra_unregister_idle_unidle();
 #endif
 	if (g->dbg_regops_tmp_buf)
 		kfree(g->dbg_regops_tmp_buf);
 	if (g->pmu.remove_support)
 		g->pmu.remove_support(&g->pmu);
@@ -2170,6 +2172,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
 	gpu->pci_class = g->pci_class;
 	gpu->pci_revision = g->pci_revision;
 	gpu->reg_ops_limit = 1024;
 	return 0;
 }
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -890,6 +890,10 @@ struct gk20a {
 	int dbg_powergating_disabled_refcount; /*refcount for pg disable */
 	int dbg_timeout_disabled_refcount; /*refcount for timeout disable */
 	/* must have dbg_sessions_lock before use */
 	struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf;
 	u32 dbg_regops_tmp_buf_ops;
 	/*
 	 * When set subsequent VMAs will separate fixed and non-fixed
 	 * allocations. This avoids conflicts with fixed and non-fixed allocs
--- a/drivers/gpu/nvgpu/nvgpu_common.c
+++ b/drivers/gpu/nvgpu/nvgpu_common.c
@@ -155,6 +155,14 @@ int nvgpu_probe(struct gk20a *g,
 	gk20a_create_sysfs(g->dev);
 	gk20a_debug_init(g->dev, debugfs_symlink);
 	g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
 	if (!g->dbg_regops_tmp_buf) {
 		dev_err(g->dev, "couldn't allocate regops tmp buf");
 		return -ENOMEM;
 	}
 	g->dbg_regops_tmp_buf_ops =
 		SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
 	g->remove_support = gk20a_remove_support;
 	return 0;
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -191,6 +191,9 @@ static void vgpu_remove_support(struct device *dev)
 	struct tegra_vgpu_intr_msg msg;
 	int err;
 	if (g->dbg_regops_tmp_buf)
 		kfree(g->dbg_regops_tmp_buf);
 	if (g->pmu.remove_support)
 		g->pmu.remove_support(&g->pmu);
@@ -242,6 +245,14 @@ static int vgpu_init_support(struct platform_device *pdev)
 	mutex_init(&g->client_lock);
 	mutex_init(&g->ch_wdt_lock);
 	g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
 	if (!g->dbg_regops_tmp_buf) {
 		dev_err(g->dev, "couldn't allocate regops tmp buf");
 		return -ENOMEM;
 	}
 	g->dbg_regops_tmp_buf_ops =
 		SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
 	g->remove_support = vgpu_remove_support;
 	return 0;
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -220,6 +220,11 @@ struct nvgpu_gpu_characteristics {
 	__u8  vbios_oem_version;
 	__u32 vbios_version;
 	/* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number
 	 * of regops */
 	__u32 reg_ops_limit;
 	__u32 reserved1;
 	/* Notes:
 	   - This struct can be safely appended with new fields. However, always
 	     keep the structure size multiple of 8 and make sure that the binary