gpu: nvgpu: gk20a: Allow regops lists longer than 128

Process long regops lists in 4-kB fragments, overcoming the overly
low limit of 128 reg ops per IOCTL call. Bump the list limit to 1024
and report the limit in GPU characteristics.

Bug 200248726

Change-Id: I3ad49139409f32aea8b1226d6562e88edccc8053
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/1253716
(cherry picked from commit 22314619b28f52610cb8769cd4c3f9eb01904eab)
Reviewed-on: http://git-master/r/1266652
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Sami Kiminki
2016-11-15 21:03:14 +02:00
committed by mobile promotions
parent 98e349ab7e
commit 425f99335b
6 changed files with 88 additions and 34 deletions

View File

@@ -1042,14 +1042,24 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
struct device *dev = dbg_s->dev; struct device *dev = dbg_s->dev;
struct gk20a *g = get_gk20a(dbg_s->dev); struct gk20a *g = get_gk20a(dbg_s->dev);
struct nvgpu_dbg_gpu_reg_op *ops;
struct channel_gk20a *ch; struct channel_gk20a *ch;
u64 ops_size = sizeof(ops[0]) * args->num_ops;
if (args->num_ops > SZ_4K / sizeof(ops[0])) gk20a_dbg_fn("%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops);
if (args->num_ops > g->gpu_characteristics.reg_ops_limit) {
gk20a_err(dev, "regops limit exceeded");
return -EINVAL; return -EINVAL;
}
gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size); if (args->num_ops == 0) {
/* Nothing to do */
return 0;
}
if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) {
gk20a_err(dev, "reg ops work buffer not allocated");
return -ENODEV;
}
if (!dbg_s->id) { if (!dbg_s->id) {
gk20a_err(dev, "can't call reg_ops on an unbound debugger session"); gk20a_err(dev, "can't call reg_ops on an unbound debugger session");
@@ -1069,21 +1079,6 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
return -ENODEV; return -ENODEV;
} }
ops = kzalloc(ops_size, GFP_KERNEL);
if (!ops) {
gk20a_err(dev, "Allocating memory failed!");
return -ENOMEM;
}
gk20a_dbg_fn("Copying regops from userspace");
if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops,
ops_size)) {
dev_err(dev, "copy_from_user failed!");
err = -EFAULT;
goto clean_up;
}
/* since exec_reg_ops sends methods to the ucode, it must take the /* since exec_reg_ops sends methods to the ucode, it must take the
* global gpu lock to protect against mixing methods from debug sessions * global gpu lock to protect against mixing methods from debug sessions
* on other channels */ * on other channels */
@@ -1099,8 +1094,47 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
} }
if (!powergate_err) { if (!powergate_err) {
err = g->ops.dbg_session_ops.exec_reg_ops(dbg_s, ops, u64 ops_offset = 0; /* index offset */
args->num_ops);
while (ops_offset < args->num_ops && !err) {
const u64 num_ops =
min(args->num_ops - ops_offset,
(u64)(g->dbg_regops_tmp_buf_ops));
const u64 fragment_size =
num_ops * sizeof(g->dbg_regops_tmp_buf[0]);
void __user *const fragment =
(void __user *)(uintptr_t)
(args->ops +
ops_offset * sizeof(g->dbg_regops_tmp_buf[0]));
gk20a_dbg_fn("Regops fragment: start_op=%llu ops=%llu",
ops_offset, num_ops);
gk20a_dbg_fn("Copying regops from userspace");
if (copy_from_user(g->dbg_regops_tmp_buf,
fragment, fragment_size)) {
dev_err(dev, "copy_from_user failed!");
err = -EFAULT;
break;
}
err = g->ops.dbg_session_ops.exec_reg_ops(
dbg_s, g->dbg_regops_tmp_buf, num_ops);
gk20a_dbg_fn("Copying result to userspace");
if (copy_to_user(fragment, g->dbg_regops_tmp_buf,
fragment_size)) {
dev_err(dev, "copy_to_user failed!");
err = -EFAULT;
break;
}
ops_offset += num_ops;
}
/* enable powergate, if previously disabled */ /* enable powergate, if previously disabled */
if (is_pg_disabled) { if (is_pg_disabled) {
powergate_err = powergate_err =
@@ -1114,21 +1148,9 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
if (!err && powergate_err) if (!err && powergate_err)
err = powergate_err; err = powergate_err;
if (err) { if (err)
gk20a_err(dev, "dbg regops failed"); gk20a_err(dev, "dbg regops failed");
goto clean_up;
}
gk20a_dbg_fn("Copying result to userspace");
if (copy_to_user((void __user *)(uintptr_t)args->ops, ops, ops_size)) {
dev_err(dev, "copy_to_user failed!");
err = -EFAULT;
goto clean_up;
}
clean_up:
kfree(ops);
return err; return err;
} }

View File

@@ -700,6 +700,8 @@ void gk20a_remove_support(struct device *dev)
#ifdef CONFIG_TEGRA_COMMON #ifdef CONFIG_TEGRA_COMMON
tegra_unregister_idle_unidle(); tegra_unregister_idle_unidle();
#endif #endif
if (g->dbg_regops_tmp_buf)
kfree(g->dbg_regops_tmp_buf);
if (g->pmu.remove_support) if (g->pmu.remove_support)
g->pmu.remove_support(&g->pmu); g->pmu.remove_support(&g->pmu);
@@ -2170,6 +2172,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
gpu->pci_class = g->pci_class; gpu->pci_class = g->pci_class;
gpu->pci_revision = g->pci_revision; gpu->pci_revision = g->pci_revision;
gpu->reg_ops_limit = 1024;
return 0; return 0;
} }

View File

@@ -890,6 +890,10 @@ struct gk20a {
int dbg_powergating_disabled_refcount; /*refcount for pg disable */ int dbg_powergating_disabled_refcount; /*refcount for pg disable */
int dbg_timeout_disabled_refcount; /*refcount for timeout disable */ int dbg_timeout_disabled_refcount; /*refcount for timeout disable */
/* must have dbg_sessions_lock before use */
struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf;
u32 dbg_regops_tmp_buf_ops;
/* /*
* When set subsequent VMAs will separate fixed and non-fixed * When set subsequent VMAs will separate fixed and non-fixed
* allocations. This avoids conflicts with fixed and non-fixed allocs * allocations. This avoids conflicts with fixed and non-fixed allocs

View File

@@ -155,6 +155,14 @@ int nvgpu_probe(struct gk20a *g,
gk20a_create_sysfs(g->dev); gk20a_create_sysfs(g->dev);
gk20a_debug_init(g->dev, debugfs_symlink); gk20a_debug_init(g->dev, debugfs_symlink);
g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
if (!g->dbg_regops_tmp_buf) {
dev_err(g->dev, "couldn't allocate regops tmp buf");
return -ENOMEM;
}
g->dbg_regops_tmp_buf_ops =
SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
g->remove_support = gk20a_remove_support; g->remove_support = gk20a_remove_support;
return 0; return 0;

View File

@@ -191,6 +191,9 @@ static void vgpu_remove_support(struct device *dev)
struct tegra_vgpu_intr_msg msg; struct tegra_vgpu_intr_msg msg;
int err; int err;
if (g->dbg_regops_tmp_buf)
kfree(g->dbg_regops_tmp_buf);
if (g->pmu.remove_support) if (g->pmu.remove_support)
g->pmu.remove_support(&g->pmu); g->pmu.remove_support(&g->pmu);
@@ -242,6 +245,14 @@ static int vgpu_init_support(struct platform_device *pdev)
mutex_init(&g->client_lock); mutex_init(&g->client_lock);
mutex_init(&g->ch_wdt_lock); mutex_init(&g->ch_wdt_lock);
g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
if (!g->dbg_regops_tmp_buf) {
dev_err(g->dev, "couldn't allocate regops tmp buf");
return -ENOMEM;
}
g->dbg_regops_tmp_buf_ops =
SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
g->remove_support = vgpu_remove_support; g->remove_support = vgpu_remove_support;
return 0; return 0;

View File

@@ -220,6 +220,11 @@ struct nvgpu_gpu_characteristics {
__u8 vbios_oem_version; __u8 vbios_oem_version;
__u32 vbios_version; __u32 vbios_version;
/* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number
* of regops */
__u32 reg_ops_limit;
__u32 reserved1;
/* Notes: /* Notes:
- This struct can be safely appended with new fields. However, always - This struct can be safely appended with new fields. However, always
keep the structure size multiple of 8 and make sure that the binary keep the structure size multiple of 8 and make sure that the binary