gpu: nvgpu: gk20a: Allow regops lists longer than 128

Process long regops lists in 4-kB fragments, overcoming the overly low limit of 128 reg ops per IOCTL call. Bump the list limit to 1024 and report the limit in GPU characteristics. Bug 200248726 Change-Id: I3ad49139409f32aea8b1226d6562e88edccc8053 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/1253716 (cherry picked from commit 22314619b28f52610cb8769cd4c3f9eb01904eab) Reviewed-on: http://git-master/r/1266652 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2016-11-15 21:03:14 +02:00
parent 98e349ab7e
commit 425f99335b
6 changed files with 88 additions and 34 deletions
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -1042,14 +1042,24 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,

 	struct device *dev = dbg_s->dev;
 	struct gk20a *g = get_gk20a(dbg_s->dev);
-	struct nvgpu_dbg_gpu_reg_op *ops;
 	struct channel_gk20a *ch;
-	u64 ops_size = sizeof(ops[0]) * args->num_ops;

-	if (args->num_ops > SZ_4K / sizeof(ops[0]))
+	gk20a_dbg_fn("%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops);
+
+	if (args->num_ops > g->gpu_characteristics.reg_ops_limit) {
+		gk20a_err(dev, "regops limit exceeded");
 		return -EINVAL;
+	}

-	gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size);
+	if (args->num_ops == 0) {
+		/* Nothing to do */
+		return 0;
+	}
+
+	if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) {
+		gk20a_err(dev, "reg ops work buffer not allocated");
+		return -ENODEV;
+	}

 	if (!dbg_s->id) {
 		gk20a_err(dev, "can't call reg_ops on an unbound debugger session");
@@ -1069,21 +1079,6 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 		return -ENODEV;
 	}

-	ops = kzalloc(ops_size, GFP_KERNEL);
-	if (!ops) {
-		gk20a_err(dev, "Allocating memory failed!");
-		return -ENOMEM;
-	}
-
-	gk20a_dbg_fn("Copying regops from userspace");
-
-	if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops,
-							ops_size)) {
-		dev_err(dev, "copy_from_user failed!");
-		err = -EFAULT;
-		goto clean_up;
-	}
-
 	/* since exec_reg_ops sends methods to the ucode, it must take the
 	 * global gpu lock to protect against mixing methods from debug sessions
 	 * on other channels */
@@ -1099,8 +1094,47 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 	}

 	if (!powergate_err) {
-		err = g->ops.dbg_session_ops.exec_reg_ops(dbg_s, ops,
-							args->num_ops);
+		u64 ops_offset = 0; /* index offset */
+
+		while (ops_offset < args->num_ops && !err) {
+			const u64 num_ops =
+				min(args->num_ops - ops_offset,
+				    (u64)(g->dbg_regops_tmp_buf_ops));
+			const u64 fragment_size =
+				num_ops * sizeof(g->dbg_regops_tmp_buf[0]);
+
+			void __user *const fragment =
+				(void __user *)(uintptr_t)
+				(args->ops +
+				 ops_offset * sizeof(g->dbg_regops_tmp_buf[0]));
+
+			gk20a_dbg_fn("Regops fragment: start_op=%llu ops=%llu",
+				     ops_offset, num_ops);
+
+			gk20a_dbg_fn("Copying regops from userspace");
+
+			if (copy_from_user(g->dbg_regops_tmp_buf,
+					   fragment, fragment_size)) {
+				dev_err(dev, "copy_from_user failed!");
+				err = -EFAULT;
+				break;
+			}
+
+			err = g->ops.dbg_session_ops.exec_reg_ops(
+				dbg_s, g->dbg_regops_tmp_buf, num_ops);
+
+			gk20a_dbg_fn("Copying result to userspace");
+
+			if (copy_to_user(fragment, g->dbg_regops_tmp_buf,
+					 fragment_size)) {
+				dev_err(dev, "copy_to_user failed!");
+				err = -EFAULT;
+				break;
+			}
+
+			ops_offset += num_ops;
+		}
+
 		/* enable powergate, if previously disabled */
 		if (is_pg_disabled) {
 			powergate_err =
@@ -1114,21 +1148,9 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 	if (!err && powergate_err)
 		err = powergate_err;

-	if (err) {
+	if (err)
 		gk20a_err(dev, "dbg regops failed");
-		goto clean_up;
-	}

-	gk20a_dbg_fn("Copying result to userspace");
-
-	if (copy_to_user((void __user *)(uintptr_t)args->ops, ops, ops_size)) {
-		dev_err(dev, "copy_to_user failed!");
-		err = -EFAULT;
-		goto clean_up;
-	}
-
- clean_up:
-	kfree(ops);
 	return err;
 }

--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -700,6 +700,8 @@ void gk20a_remove_support(struct device *dev)
 #ifdef CONFIG_TEGRA_COMMON
 	tegra_unregister_idle_unidle();
 #endif
+	if (g->dbg_regops_tmp_buf)
+		kfree(g->dbg_regops_tmp_buf);

 	if (g->pmu.remove_support)
 		g->pmu.remove_support(&g->pmu);
@@ -2170,6 +2172,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
 	gpu->pci_class = g->pci_class;
 	gpu->pci_revision = g->pci_revision;

+	gpu->reg_ops_limit = 1024;
+
 	return 0;
 }

--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -890,6 +890,10 @@ struct gk20a {
 	int dbg_powergating_disabled_refcount; /*refcount for pg disable */
 	int dbg_timeout_disabled_refcount; /*refcount for timeout disable */

+	/* must have dbg_sessions_lock before use */
+	struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf;
+	u32 dbg_regops_tmp_buf_ops;
+
 	/*
 	 * When set subsequent VMAs will separate fixed and non-fixed
 	 * allocations. This avoids conflicts with fixed and non-fixed allocs
--- a/drivers/gpu/nvgpu/nvgpu_common.c
+++ b/drivers/gpu/nvgpu/nvgpu_common.c
@@ -155,6 +155,14 @@ int nvgpu_probe(struct gk20a *g,
 	gk20a_create_sysfs(g->dev);
 	gk20a_debug_init(g->dev, debugfs_symlink);

+	g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
+	if (!g->dbg_regops_tmp_buf) {
+		dev_err(g->dev, "couldn't allocate regops tmp buf");
+		return -ENOMEM;
+	}
+	g->dbg_regops_tmp_buf_ops =
+		SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
+
 	g->remove_support = gk20a_remove_support;

 	return 0;
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -191,6 +191,9 @@ static void vgpu_remove_support(struct device *dev)
 	struct tegra_vgpu_intr_msg msg;
 	int err;

+	if (g->dbg_regops_tmp_buf)
+		kfree(g->dbg_regops_tmp_buf);
+
 	if (g->pmu.remove_support)
 		g->pmu.remove_support(&g->pmu);

@@ -242,6 +245,14 @@ static int vgpu_init_support(struct platform_device *pdev)
 	mutex_init(&g->client_lock);
 	mutex_init(&g->ch_wdt_lock);

+	g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
+	if (!g->dbg_regops_tmp_buf) {
+		dev_err(g->dev, "couldn't allocate regops tmp buf");
+		return -ENOMEM;
+	}
+	g->dbg_regops_tmp_buf_ops =
+		SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
+
 	g->remove_support = vgpu_remove_support;
 	return 0;

--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -220,6 +220,11 @@ struct nvgpu_gpu_characteristics {
 	__u8  vbios_oem_version;
 	__u32 vbios_version;

+	/* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number
+	 * of regops */
+	__u32 reg_ops_limit;
+	__u32 reserved1;
+
 	/* Notes:
 	   - This struct can be safely appended with new fields. However, always
 	     keep the structure size multiple of 8 and make sure that the binary