gpu: nvgpu: support tuning per-ch deterministic opts

Add a new ioctl NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS to adjust
deterministic options on a per-channel basis. Currently, the only
supported option is to relax the no-railgating requirement on open
deterministic channels. This also disallows submits on such channels,
until the railgate option is reset.

Bug 200327089

Change-Id: If4f0f51fd1d40ad7407d13638150d7402479aff0
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1554563
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Holtta
2017-11-06 14:25:47 +02:00
committed by mobile promotions
parent 1480afeb01
commit 8bdce5337e
7 changed files with 179 additions and 3 deletions

View File

@@ -537,6 +537,17 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
if (c->deterministic)
nvgpu_rwsem_down_read(&g->deterministic_busy);
if (c->deterministic && c->deterministic_railgate_allowed) {
/*
* Nope - this channel has dropped its own power ref. As
* deterministic submits don't hold power on per each submitted
* job like normal ones do, the GPU might railgate any time now
* and thus submit is disallowed.
*/
err = -EINVAL;
goto clean_up;
}
trace_gk20a_channel_submit_gpfifo(g->name,
c->chid,
num_entries,

View File

@@ -161,6 +161,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING},
{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL},
{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS,
NVGPU_SUPPORT_DETERMINISTIC_OPTS},
{NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE,
NVGPU_SUPPORT_IO_COHERENCE},
{NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST,
@@ -1319,6 +1321,114 @@ static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g,
return err;
}
static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch,
u32 flags)
{
int err = 0;
bool allow;
bool disallow;
allow = flags &
NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING;
disallow = flags &
NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING;
/* Can't be both at the same time */
if (allow && disallow)
return -EINVAL;
/* Nothing to do */
if (!allow && !disallow)
return 0;
/*
* Moving into explicit idle or back from it? A call that doesn't
* change the status is a no-op.
*/
if (!ch->deterministic_railgate_allowed &&
allow) {
gk20a_idle(ch->g);
} else if (ch->deterministic_railgate_allowed &&
!allow) {
err = gk20a_busy(ch->g);
if (err) {
nvgpu_warn(ch->g,
"cannot busy to restore deterministic ch");
return err;
}
}
ch->deterministic_railgate_allowed = allow;
return err;
}
static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags)
{
if (!ch->deterministic)
return -EINVAL;
return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags);
}
static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g,
struct nvgpu_gpu_set_deterministic_opts_args *args)
{
int __user *user_channels;
u32 i = 0;
int err = 0;
gk20a_dbg_fn("");
user_channels = (int __user *)(uintptr_t)args->channels;
/* Upper limit; prevent holding deterministic_busy for long */
if (args->num_channels > g->fifo.num_channels) {
err = -EINVAL;
goto out;
}
/* Trivial sanity check first */
if (!access_ok(VERIFY_READ, user_channels,
args->num_channels * sizeof(int))) {
err = -EFAULT;
goto out;
}
nvgpu_rwsem_down_read(&g->deterministic_busy);
/* note: we exit at the first failure */
for (; i < args->num_channels; i++) {
int ch_fd = 0;
struct channel_gk20a *ch;
if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) {
/* User raced with above access_ok */
err = -EFAULT;
break;
}
ch = gk20a_get_channel_from_file(ch_fd);
if (!ch) {
err = -EINVAL;
break;
}
err = nvgpu_gpu_set_deterministic_ch(ch, args->flags);
gk20a_channel_put(ch);
if (err)
break;
}
nvgpu_rwsem_up_read(&g->deterministic_busy);
out:
args->num_channels = i;
return err;
}
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct gk20a_ctrl_priv *priv = filp->private_data;
@@ -1633,6 +1743,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
(struct nvgpu_gpu_set_therm_alert_limit_args *)buf);
break;
case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS:
err = nvgpu_gpu_set_deterministic_opts(g,
(struct nvgpu_gpu_set_deterministic_opts_args *)buf);
break;
default:
gk20a_dbg_info("unrecognized gpu ioctl cmd: 0x%x", cmd);
err = -ENOTTY;

View File

@@ -578,7 +578,10 @@ unbind:
if (ch->deterministic) {
nvgpu_rwsem_down_read(&g->deterministic_busy);
ch->deterministic = false;
if (!ch->deterministic_railgate_allowed)
gk20a_idle(g);
ch->deterministic_railgate_allowed = false;
nvgpu_rwsem_up_read(&g->deterministic_busy);
}

View File

@@ -208,6 +208,8 @@ struct channel_gk20a {
bool first_init;
bool vpr;
bool deterministic;
/* deterministic, but explicitly idle and submits disallowed */
bool deterministic_railgate_allowed;
bool cde;
pid_t pid;
pid_t tgid;

View File

@@ -439,6 +439,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
true);
__nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
__nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
__nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true);

View File

@@ -91,6 +91,9 @@ struct gk20a;
/* FECS context switch tracing is available */
#define NVGPU_SUPPORT_FECS_CTXSW_TRACE 38
/* NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS is available */
#define NVGPU_SUPPORT_DETERMINISTIC_OPTS 39
/*
* Security flags
*/

View File

@@ -148,7 +148,8 @@ struct nvgpu_gpu_zbc_query_table_args {
#define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21)
/* Direct PTE kind control is supported (map_buffer_ex) */
#define NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL (1ULL << 23)
/* NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS is available */
#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS (1ULL << 24)
struct nvgpu_gpu_characteristics {
__u32 arch;
@@ -801,6 +802,42 @@ struct nvgpu_gpu_set_therm_alert_limit_args {
__s32 temp_f24_8;
};
/*
* Adjust options of deterministic channels in channel batches.
*
* This supports only one option currently: relax railgate blocking by
* "disabling" the channel.
*
* Open deterministic channels do not allow the GPU to railgate by default. It
* may be preferable to hold preopened channel contexts open and idle and still
* railgate the GPU, taking the channels back into use dynamically in userspace
* as an optimization. This ioctl allows to drop or reacquire the requirement
* to hold GPU power on for individual channels. If allow_railgate is set on a
* channel, no work can be submitted to it.
*
* num_channels is updated to signify how many channels were updated
* successfully. It can be used to test which was the first update to fail.
*/
struct nvgpu_gpu_set_deterministic_opts_args {
__u32 num_channels; /* in/out */
/*
* Set or unset the railgating reference held by deterministic channels. If
* the channel status is already the same as the flag, this is a no-op. Both
* of these flags cannot be set at the same time. If none are set, the state
* is left as is.
*/
#define NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING (1 << 0)
#define NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING (1 << 1)
__u32 flags; /* in */
/*
* This is a pointer to an array of size num_channels.
*
* The channels have to be valid fds and be previously set as
* deterministic.
*/
__u64 channels; /* in */
};
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -885,8 +922,11 @@ struct nvgpu_gpu_set_therm_alert_limit_args {
#define NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT \
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 39, \
struct nvgpu_gpu_set_therm_alert_limit_args)
#define NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS \
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 40, \
struct nvgpu_gpu_set_deterministic_opts_args)
#define NVGPU_GPU_IOCTL_LAST \
_IOC_NR(NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT)
_IOC_NR(NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS)
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)