mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
gpu: nvgpu: gk20a: Allow regops lists longer than 128
Process long regops lists in 4-kB fragments, overcoming the overly low limit of 128 reg ops per IOCTL call. Bump the list limit to 1024 and report the limit in GPU characteristics. Bug 200248726 Change-Id: I3ad49139409f32aea8b1226d6562e88edccc8053 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/1253716 (cherry picked from commit 22314619b28f52610cb8769cd4c3f9eb01904eab) Reviewed-on: http://git-master/r/1266652 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
98e349ab7e
commit
425f99335b
@@ -1042,14 +1042,24 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
|
||||
|
||||
struct device *dev = dbg_s->dev;
|
||||
struct gk20a *g = get_gk20a(dbg_s->dev);
|
||||
struct nvgpu_dbg_gpu_reg_op *ops;
|
||||
struct channel_gk20a *ch;
|
||||
u64 ops_size = sizeof(ops[0]) * args->num_ops;
|
||||
|
||||
if (args->num_ops > SZ_4K / sizeof(ops[0]))
|
||||
gk20a_dbg_fn("%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops);
|
||||
|
||||
if (args->num_ops > g->gpu_characteristics.reg_ops_limit) {
|
||||
gk20a_err(dev, "regops limit exceeded");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size);
|
||||
if (args->num_ops == 0) {
|
||||
/* Nothing to do */
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) {
|
||||
gk20a_err(dev, "reg ops work buffer not allocated");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!dbg_s->id) {
|
||||
gk20a_err(dev, "can't call reg_ops on an unbound debugger session");
|
||||
@@ -1069,21 +1079,6 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
ops = kzalloc(ops_size, GFP_KERNEL);
|
||||
if (!ops) {
|
||||
gk20a_err(dev, "Allocating memory failed!");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
gk20a_dbg_fn("Copying regops from userspace");
|
||||
|
||||
if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops,
|
||||
ops_size)) {
|
||||
dev_err(dev, "copy_from_user failed!");
|
||||
err = -EFAULT;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
/* since exec_reg_ops sends methods to the ucode, it must take the
|
||||
* global gpu lock to protect against mixing methods from debug sessions
|
||||
* on other channels */
|
||||
@@ -1099,8 +1094,47 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
|
||||
}
|
||||
|
||||
if (!powergate_err) {
|
||||
err = g->ops.dbg_session_ops.exec_reg_ops(dbg_s, ops,
|
||||
args->num_ops);
|
||||
u64 ops_offset = 0; /* index offset */
|
||||
|
||||
while (ops_offset < args->num_ops && !err) {
|
||||
const u64 num_ops =
|
||||
min(args->num_ops - ops_offset,
|
||||
(u64)(g->dbg_regops_tmp_buf_ops));
|
||||
const u64 fragment_size =
|
||||
num_ops * sizeof(g->dbg_regops_tmp_buf[0]);
|
||||
|
||||
void __user *const fragment =
|
||||
(void __user *)(uintptr_t)
|
||||
(args->ops +
|
||||
ops_offset * sizeof(g->dbg_regops_tmp_buf[0]));
|
||||
|
||||
gk20a_dbg_fn("Regops fragment: start_op=%llu ops=%llu",
|
||||
ops_offset, num_ops);
|
||||
|
||||
gk20a_dbg_fn("Copying regops from userspace");
|
||||
|
||||
if (copy_from_user(g->dbg_regops_tmp_buf,
|
||||
fragment, fragment_size)) {
|
||||
dev_err(dev, "copy_from_user failed!");
|
||||
err = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
err = g->ops.dbg_session_ops.exec_reg_ops(
|
||||
dbg_s, g->dbg_regops_tmp_buf, num_ops);
|
||||
|
||||
gk20a_dbg_fn("Copying result to userspace");
|
||||
|
||||
if (copy_to_user(fragment, g->dbg_regops_tmp_buf,
|
||||
fragment_size)) {
|
||||
dev_err(dev, "copy_to_user failed!");
|
||||
err = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
ops_offset += num_ops;
|
||||
}
|
||||
|
||||
/* enable powergate, if previously disabled */
|
||||
if (is_pg_disabled) {
|
||||
powergate_err =
|
||||
@@ -1114,21 +1148,9 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
|
||||
if (!err && powergate_err)
|
||||
err = powergate_err;
|
||||
|
||||
if (err) {
|
||||
if (err)
|
||||
gk20a_err(dev, "dbg regops failed");
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
gk20a_dbg_fn("Copying result to userspace");
|
||||
|
||||
if (copy_to_user((void __user *)(uintptr_t)args->ops, ops, ops_size)) {
|
||||
dev_err(dev, "copy_to_user failed!");
|
||||
err = -EFAULT;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
clean_up:
|
||||
kfree(ops);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@@ -700,6 +700,8 @@ void gk20a_remove_support(struct device *dev)
|
||||
#ifdef CONFIG_TEGRA_COMMON
|
||||
tegra_unregister_idle_unidle();
|
||||
#endif
|
||||
if (g->dbg_regops_tmp_buf)
|
||||
kfree(g->dbg_regops_tmp_buf);
|
||||
|
||||
if (g->pmu.remove_support)
|
||||
g->pmu.remove_support(&g->pmu);
|
||||
@@ -2170,6 +2172,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
|
||||
gpu->pci_class = g->pci_class;
|
||||
gpu->pci_revision = g->pci_revision;
|
||||
|
||||
gpu->reg_ops_limit = 1024;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -890,6 +890,10 @@ struct gk20a {
|
||||
int dbg_powergating_disabled_refcount; /*refcount for pg disable */
|
||||
int dbg_timeout_disabled_refcount; /*refcount for timeout disable */
|
||||
|
||||
/* must have dbg_sessions_lock before use */
|
||||
struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf;
|
||||
u32 dbg_regops_tmp_buf_ops;
|
||||
|
||||
/*
|
||||
* When set subsequent VMAs will separate fixed and non-fixed
|
||||
* allocations. This avoids conflicts with fixed and non-fixed allocs
|
||||
|
||||
@@ -155,6 +155,14 @@ int nvgpu_probe(struct gk20a *g,
|
||||
gk20a_create_sysfs(g->dev);
|
||||
gk20a_debug_init(g->dev, debugfs_symlink);
|
||||
|
||||
g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
|
||||
if (!g->dbg_regops_tmp_buf) {
|
||||
dev_err(g->dev, "couldn't allocate regops tmp buf");
|
||||
return -ENOMEM;
|
||||
}
|
||||
g->dbg_regops_tmp_buf_ops =
|
||||
SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
|
||||
|
||||
g->remove_support = gk20a_remove_support;
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -191,6 +191,9 @@ static void vgpu_remove_support(struct device *dev)
|
||||
struct tegra_vgpu_intr_msg msg;
|
||||
int err;
|
||||
|
||||
if (g->dbg_regops_tmp_buf)
|
||||
kfree(g->dbg_regops_tmp_buf);
|
||||
|
||||
if (g->pmu.remove_support)
|
||||
g->pmu.remove_support(&g->pmu);
|
||||
|
||||
@@ -242,6 +245,14 @@ static int vgpu_init_support(struct platform_device *pdev)
|
||||
mutex_init(&g->client_lock);
|
||||
mutex_init(&g->ch_wdt_lock);
|
||||
|
||||
g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
|
||||
if (!g->dbg_regops_tmp_buf) {
|
||||
dev_err(g->dev, "couldn't allocate regops tmp buf");
|
||||
return -ENOMEM;
|
||||
}
|
||||
g->dbg_regops_tmp_buf_ops =
|
||||
SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
|
||||
|
||||
g->remove_support = vgpu_remove_support;
|
||||
return 0;
|
||||
|
||||
|
||||
@@ -220,6 +220,11 @@ struct nvgpu_gpu_characteristics {
|
||||
__u8 vbios_oem_version;
|
||||
__u32 vbios_version;
|
||||
|
||||
/* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number
|
||||
* of regops */
|
||||
__u32 reg_ops_limit;
|
||||
__u32 reserved1;
|
||||
|
||||
/* Notes:
|
||||
- This struct can be safely appended with new fields. However, always
|
||||
keep the structure size multiple of 8 and make sure that the binary
|
||||
|
||||
Reference in New Issue
Block a user