mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: support tuning per-ch deterministic opts
Add a new ioctl NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS to adjust deterministic options on a per-channel basis. Currently, the only supported option is to relax the no-railgating requirement on open deterministic channels. This also disallows submits on such channels, until the railgate option is reset. Bug 200327089 Change-Id: If4f0f51fd1d40ad7407d13638150d7402479aff0 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1554563 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
1480afeb01
commit
8bdce5337e
@@ -537,6 +537,17 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
||||
if (c->deterministic)
|
||||
nvgpu_rwsem_down_read(&g->deterministic_busy);
|
||||
|
||||
if (c->deterministic && c->deterministic_railgate_allowed) {
|
||||
/*
|
||||
* Nope - this channel has dropped its own power ref. As
|
||||
* deterministic submits don't hold power on per each submitted
|
||||
* job like normal ones do, the GPU might railgate any time now
|
||||
* and thus submit is disallowed.
|
||||
*/
|
||||
err = -EINVAL;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
trace_gk20a_channel_submit_gpfifo(g->name,
|
||||
c->chid,
|
||||
num_entries,
|
||||
|
||||
@@ -161,6 +161,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
|
||||
NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING},
|
||||
{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
|
||||
NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL},
|
||||
{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS,
|
||||
NVGPU_SUPPORT_DETERMINISTIC_OPTS},
|
||||
{NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE,
|
||||
NVGPU_SUPPORT_IO_COHERENCE},
|
||||
{NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST,
|
||||
@@ -1319,6 +1321,114 @@ static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g,
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch,
|
||||
u32 flags)
|
||||
{
|
||||
int err = 0;
|
||||
bool allow;
|
||||
bool disallow;
|
||||
|
||||
allow = flags &
|
||||
NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING;
|
||||
|
||||
disallow = flags &
|
||||
NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING;
|
||||
|
||||
/* Can't be both at the same time */
|
||||
if (allow && disallow)
|
||||
return -EINVAL;
|
||||
|
||||
/* Nothing to do */
|
||||
if (!allow && !disallow)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Moving into explicit idle or back from it? A call that doesn't
|
||||
* change the status is a no-op.
|
||||
*/
|
||||
if (!ch->deterministic_railgate_allowed &&
|
||||
allow) {
|
||||
gk20a_idle(ch->g);
|
||||
} else if (ch->deterministic_railgate_allowed &&
|
||||
!allow) {
|
||||
err = gk20a_busy(ch->g);
|
||||
if (err) {
|
||||
nvgpu_warn(ch->g,
|
||||
"cannot busy to restore deterministic ch");
|
||||
return err;
|
||||
}
|
||||
}
|
||||
ch->deterministic_railgate_allowed = allow;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags)
|
||||
{
|
||||
if (!ch->deterministic)
|
||||
return -EINVAL;
|
||||
|
||||
return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags);
|
||||
}
|
||||
|
||||
static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g,
|
||||
struct nvgpu_gpu_set_deterministic_opts_args *args)
|
||||
{
|
||||
int __user *user_channels;
|
||||
u32 i = 0;
|
||||
int err = 0;
|
||||
|
||||
gk20a_dbg_fn("");
|
||||
|
||||
user_channels = (int __user *)(uintptr_t)args->channels;
|
||||
|
||||
/* Upper limit; prevent holding deterministic_busy for long */
|
||||
if (args->num_channels > g->fifo.num_channels) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Trivial sanity check first */
|
||||
if (!access_ok(VERIFY_READ, user_channels,
|
||||
args->num_channels * sizeof(int))) {
|
||||
err = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
nvgpu_rwsem_down_read(&g->deterministic_busy);
|
||||
|
||||
/* note: we exit at the first failure */
|
||||
for (; i < args->num_channels; i++) {
|
||||
int ch_fd = 0;
|
||||
struct channel_gk20a *ch;
|
||||
|
||||
if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) {
|
||||
/* User raced with above access_ok */
|
||||
err = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
ch = gk20a_get_channel_from_file(ch_fd);
|
||||
if (!ch) {
|
||||
err = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
err = nvgpu_gpu_set_deterministic_ch(ch, args->flags);
|
||||
|
||||
gk20a_channel_put(ch);
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
nvgpu_rwsem_up_read(&g->deterministic_busy);
|
||||
|
||||
out:
|
||||
args->num_channels = i;
|
||||
return err;
|
||||
}
|
||||
|
||||
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct gk20a_ctrl_priv *priv = filp->private_data;
|
||||
@@ -1633,6 +1743,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
|
||||
(struct nvgpu_gpu_set_therm_alert_limit_args *)buf);
|
||||
break;
|
||||
|
||||
case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS:
|
||||
err = nvgpu_gpu_set_deterministic_opts(g,
|
||||
(struct nvgpu_gpu_set_deterministic_opts_args *)buf);
|
||||
break;
|
||||
|
||||
default:
|
||||
gk20a_dbg_info("unrecognized gpu ioctl cmd: 0x%x", cmd);
|
||||
err = -ENOTTY;
|
||||
|
||||
@@ -578,7 +578,10 @@ unbind:
|
||||
if (ch->deterministic) {
|
||||
nvgpu_rwsem_down_read(&g->deterministic_busy);
|
||||
ch->deterministic = false;
|
||||
if (!ch->deterministic_railgate_allowed)
|
||||
gk20a_idle(g);
|
||||
ch->deterministic_railgate_allowed = false;
|
||||
|
||||
nvgpu_rwsem_up_read(&g->deterministic_busy);
|
||||
}
|
||||
|
||||
|
||||
@@ -208,6 +208,8 @@ struct channel_gk20a {
|
||||
bool first_init;
|
||||
bool vpr;
|
||||
bool deterministic;
|
||||
/* deterministic, but explicitly idle and submits disallowed */
|
||||
bool deterministic_railgate_allowed;
|
||||
bool cde;
|
||||
pid_t pid;
|
||||
pid_t tgid;
|
||||
|
||||
@@ -439,6 +439,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
|
||||
NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
|
||||
true);
|
||||
|
||||
__nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
|
||||
|
||||
__nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
|
||||
__nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true);
|
||||
|
||||
|
||||
@@ -91,6 +91,9 @@ struct gk20a;
|
||||
/* FECS context switch tracing is available */
|
||||
#define NVGPU_SUPPORT_FECS_CTXSW_TRACE 38
|
||||
|
||||
/* NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS is available */
|
||||
#define NVGPU_SUPPORT_DETERMINISTIC_OPTS 39
|
||||
|
||||
/*
|
||||
* Security flags
|
||||
*/
|
||||
|
||||
@@ -148,7 +148,8 @@ struct nvgpu_gpu_zbc_query_table_args {
|
||||
#define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21)
|
||||
/* Direct PTE kind control is supported (map_buffer_ex) */
|
||||
#define NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL (1ULL << 23)
|
||||
|
||||
/* NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS is available */
|
||||
#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS (1ULL << 24)
|
||||
|
||||
struct nvgpu_gpu_characteristics {
|
||||
__u32 arch;
|
||||
@@ -801,6 +802,42 @@ struct nvgpu_gpu_set_therm_alert_limit_args {
|
||||
__s32 temp_f24_8;
|
||||
};
|
||||
|
||||
/*
|
||||
* Adjust options of deterministic channels in channel batches.
|
||||
*
|
||||
* This supports only one option currently: relax railgate blocking by
|
||||
* "disabling" the channel.
|
||||
*
|
||||
* Open deterministic channels do not allow the GPU to railgate by default. It
|
||||
* may be preferable to hold preopened channel contexts open and idle and still
|
||||
* railgate the GPU, taking the channels back into use dynamically in userspace
|
||||
* as an optimization. This ioctl allows to drop or reacquire the requirement
|
||||
* to hold GPU power on for individual channels. If allow_railgate is set on a
|
||||
* channel, no work can be submitted to it.
|
||||
*
|
||||
* num_channels is updated to signify how many channels were updated
|
||||
* successfully. It can be used to test which was the first update to fail.
|
||||
*/
|
||||
struct nvgpu_gpu_set_deterministic_opts_args {
|
||||
__u32 num_channels; /* in/out */
|
||||
/*
|
||||
* Set or unset the railgating reference held by deterministic channels. If
|
||||
* the channel status is already the same as the flag, this is a no-op. Both
|
||||
* of these flags cannot be set at the same time. If none are set, the state
|
||||
* is left as is.
|
||||
*/
|
||||
#define NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING (1 << 0)
|
||||
#define NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING (1 << 1)
|
||||
__u32 flags; /* in */
|
||||
/*
|
||||
* This is a pointer to an array of size num_channels.
|
||||
*
|
||||
* The channels have to be valid fds and be previously set as
|
||||
* deterministic.
|
||||
*/
|
||||
__u64 channels; /* in */
|
||||
};
|
||||
|
||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
||||
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
||||
@@ -885,8 +922,11 @@ struct nvgpu_gpu_set_therm_alert_limit_args {
|
||||
#define NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT \
|
||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 39, \
|
||||
struct nvgpu_gpu_set_therm_alert_limit_args)
|
||||
#define NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS \
|
||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 40, \
|
||||
struct nvgpu_gpu_set_deterministic_opts_args)
|
||||
#define NVGPU_GPU_IOCTL_LAST \
|
||||
_IOC_NR(NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT)
|
||||
_IOC_NR(NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS)
|
||||
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
||||
sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user