mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: support tuning per-ch deterministic opts
Add a new ioctl NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS to adjust deterministic options on a per-channel basis. Currently, the only supported option is to relax the no-railgating requirement on open deterministic channels. This also disallows submits on such channels, until the railgate option is reset. Bug 200327089 Change-Id: If4f0f51fd1d40ad7407d13638150d7402479aff0 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1554563 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
1480afeb01
commit
8bdce5337e
@@ -537,6 +537,17 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
if (c->deterministic)
|
if (c->deterministic)
|
||||||
nvgpu_rwsem_down_read(&g->deterministic_busy);
|
nvgpu_rwsem_down_read(&g->deterministic_busy);
|
||||||
|
|
||||||
|
if (c->deterministic && c->deterministic_railgate_allowed) {
|
||||||
|
/*
|
||||||
|
* Nope - this channel has dropped its own power ref. As
|
||||||
|
* deterministic submits don't hold power on per each submitted
|
||||||
|
* job like normal ones do, the GPU might railgate any time now
|
||||||
|
* and thus submit is disallowed.
|
||||||
|
*/
|
||||||
|
err = -EINVAL;
|
||||||
|
goto clean_up;
|
||||||
|
}
|
||||||
|
|
||||||
trace_gk20a_channel_submit_gpfifo(g->name,
|
trace_gk20a_channel_submit_gpfifo(g->name,
|
||||||
c->chid,
|
c->chid,
|
||||||
num_entries,
|
num_entries,
|
||||||
|
|||||||
@@ -161,6 +161,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
|
|||||||
NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING},
|
NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING},
|
||||||
{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
|
{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
|
||||||
NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL},
|
NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL},
|
||||||
|
{NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS,
|
||||||
|
NVGPU_SUPPORT_DETERMINISTIC_OPTS},
|
||||||
{NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE,
|
{NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE,
|
||||||
NVGPU_SUPPORT_IO_COHERENCE},
|
NVGPU_SUPPORT_IO_COHERENCE},
|
||||||
{NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST,
|
{NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST,
|
||||||
@@ -1319,6 +1321,114 @@ static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch,
|
||||||
|
u32 flags)
|
||||||
|
{
|
||||||
|
int err = 0;
|
||||||
|
bool allow;
|
||||||
|
bool disallow;
|
||||||
|
|
||||||
|
allow = flags &
|
||||||
|
NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING;
|
||||||
|
|
||||||
|
disallow = flags &
|
||||||
|
NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING;
|
||||||
|
|
||||||
|
/* Can't be both at the same time */
|
||||||
|
if (allow && disallow)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/* Nothing to do */
|
||||||
|
if (!allow && !disallow)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Moving into explicit idle or back from it? A call that doesn't
|
||||||
|
* change the status is a no-op.
|
||||||
|
*/
|
||||||
|
if (!ch->deterministic_railgate_allowed &&
|
||||||
|
allow) {
|
||||||
|
gk20a_idle(ch->g);
|
||||||
|
} else if (ch->deterministic_railgate_allowed &&
|
||||||
|
!allow) {
|
||||||
|
err = gk20a_busy(ch->g);
|
||||||
|
if (err) {
|
||||||
|
nvgpu_warn(ch->g,
|
||||||
|
"cannot busy to restore deterministic ch");
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ch->deterministic_railgate_allowed = allow;
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags)
|
||||||
|
{
|
||||||
|
if (!ch->deterministic)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g,
|
||||||
|
struct nvgpu_gpu_set_deterministic_opts_args *args)
|
||||||
|
{
|
||||||
|
int __user *user_channels;
|
||||||
|
u32 i = 0;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
user_channels = (int __user *)(uintptr_t)args->channels;
|
||||||
|
|
||||||
|
/* Upper limit; prevent holding deterministic_busy for long */
|
||||||
|
if (args->num_channels > g->fifo.num_channels) {
|
||||||
|
err = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Trivial sanity check first */
|
||||||
|
if (!access_ok(VERIFY_READ, user_channels,
|
||||||
|
args->num_channels * sizeof(int))) {
|
||||||
|
err = -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvgpu_rwsem_down_read(&g->deterministic_busy);
|
||||||
|
|
||||||
|
/* note: we exit at the first failure */
|
||||||
|
for (; i < args->num_channels; i++) {
|
||||||
|
int ch_fd = 0;
|
||||||
|
struct channel_gk20a *ch;
|
||||||
|
|
||||||
|
if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) {
|
||||||
|
/* User raced with above access_ok */
|
||||||
|
err = -EFAULT;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
ch = gk20a_get_channel_from_file(ch_fd);
|
||||||
|
if (!ch) {
|
||||||
|
err = -EINVAL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = nvgpu_gpu_set_deterministic_ch(ch, args->flags);
|
||||||
|
|
||||||
|
gk20a_channel_put(ch);
|
||||||
|
|
||||||
|
if (err)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvgpu_rwsem_up_read(&g->deterministic_busy);
|
||||||
|
|
||||||
|
out:
|
||||||
|
args->num_channels = i;
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||||
{
|
{
|
||||||
struct gk20a_ctrl_priv *priv = filp->private_data;
|
struct gk20a_ctrl_priv *priv = filp->private_data;
|
||||||
@@ -1633,6 +1743,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
|
|||||||
(struct nvgpu_gpu_set_therm_alert_limit_args *)buf);
|
(struct nvgpu_gpu_set_therm_alert_limit_args *)buf);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS:
|
||||||
|
err = nvgpu_gpu_set_deterministic_opts(g,
|
||||||
|
(struct nvgpu_gpu_set_deterministic_opts_args *)buf);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
gk20a_dbg_info("unrecognized gpu ioctl cmd: 0x%x", cmd);
|
gk20a_dbg_info("unrecognized gpu ioctl cmd: 0x%x", cmd);
|
||||||
err = -ENOTTY;
|
err = -ENOTTY;
|
||||||
|
|||||||
@@ -578,7 +578,10 @@ unbind:
|
|||||||
if (ch->deterministic) {
|
if (ch->deterministic) {
|
||||||
nvgpu_rwsem_down_read(&g->deterministic_busy);
|
nvgpu_rwsem_down_read(&g->deterministic_busy);
|
||||||
ch->deterministic = false;
|
ch->deterministic = false;
|
||||||
|
if (!ch->deterministic_railgate_allowed)
|
||||||
gk20a_idle(g);
|
gk20a_idle(g);
|
||||||
|
ch->deterministic_railgate_allowed = false;
|
||||||
|
|
||||||
nvgpu_rwsem_up_read(&g->deterministic_busy);
|
nvgpu_rwsem_up_read(&g->deterministic_busy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -208,6 +208,8 @@ struct channel_gk20a {
|
|||||||
bool first_init;
|
bool first_init;
|
||||||
bool vpr;
|
bool vpr;
|
||||||
bool deterministic;
|
bool deterministic;
|
||||||
|
/* deterministic, but explicitly idle and submits disallowed */
|
||||||
|
bool deterministic_railgate_allowed;
|
||||||
bool cde;
|
bool cde;
|
||||||
pid_t pid;
|
pid_t pid;
|
||||||
pid_t tgid;
|
pid_t tgid;
|
||||||
|
|||||||
@@ -439,6 +439,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
|
|||||||
NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
|
NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
|
||||||
true);
|
true);
|
||||||
|
|
||||||
|
__nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true);
|
||||||
|
|
||||||
__nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
|
__nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true);
|
||||||
__nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true);
|
__nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true);
|
||||||
|
|
||||||
|
|||||||
@@ -91,6 +91,9 @@ struct gk20a;
|
|||||||
/* FECS context switch tracing is available */
|
/* FECS context switch tracing is available */
|
||||||
#define NVGPU_SUPPORT_FECS_CTXSW_TRACE 38
|
#define NVGPU_SUPPORT_FECS_CTXSW_TRACE 38
|
||||||
|
|
||||||
|
/* NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS is available */
|
||||||
|
#define NVGPU_SUPPORT_DETERMINISTIC_OPTS 39
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Security flags
|
* Security flags
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -148,7 +148,8 @@ struct nvgpu_gpu_zbc_query_table_args {
|
|||||||
#define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21)
|
#define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21)
|
||||||
/* Direct PTE kind control is supported (map_buffer_ex) */
|
/* Direct PTE kind control is supported (map_buffer_ex) */
|
||||||
#define NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL (1ULL << 23)
|
#define NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL (1ULL << 23)
|
||||||
|
/* NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS is available */
|
||||||
|
#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS (1ULL << 24)
|
||||||
|
|
||||||
struct nvgpu_gpu_characteristics {
|
struct nvgpu_gpu_characteristics {
|
||||||
__u32 arch;
|
__u32 arch;
|
||||||
@@ -801,6 +802,42 @@ struct nvgpu_gpu_set_therm_alert_limit_args {
|
|||||||
__s32 temp_f24_8;
|
__s32 temp_f24_8;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Adjust options of deterministic channels in channel batches.
|
||||||
|
*
|
||||||
|
* This supports only one option currently: relax railgate blocking by
|
||||||
|
* "disabling" the channel.
|
||||||
|
*
|
||||||
|
* Open deterministic channels do not allow the GPU to railgate by default. It
|
||||||
|
* may be preferable to hold preopened channel contexts open and idle and still
|
||||||
|
* railgate the GPU, taking the channels back into use dynamically in userspace
|
||||||
|
* as an optimization. This ioctl allows to drop or reacquire the requirement
|
||||||
|
* to hold GPU power on for individual channels. If allow_railgate is set on a
|
||||||
|
* channel, no work can be submitted to it.
|
||||||
|
*
|
||||||
|
* num_channels is updated to signify how many channels were updated
|
||||||
|
* successfully. It can be used to test which was the first update to fail.
|
||||||
|
*/
|
||||||
|
struct nvgpu_gpu_set_deterministic_opts_args {
|
||||||
|
__u32 num_channels; /* in/out */
|
||||||
|
/*
|
||||||
|
* Set or unset the railgating reference held by deterministic channels. If
|
||||||
|
* the channel status is already the same as the flag, this is a no-op. Both
|
||||||
|
* of these flags cannot be set at the same time. If none are set, the state
|
||||||
|
* is left as is.
|
||||||
|
*/
|
||||||
|
#define NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING (1 << 0)
|
||||||
|
#define NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING (1 << 1)
|
||||||
|
__u32 flags; /* in */
|
||||||
|
/*
|
||||||
|
* This is a pointer to an array of size num_channels.
|
||||||
|
*
|
||||||
|
* The channels have to be valid fds and be previously set as
|
||||||
|
* deterministic.
|
||||||
|
*/
|
||||||
|
__u64 channels; /* in */
|
||||||
|
};
|
||||||
|
|
||||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
||||||
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
||||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
||||||
@@ -885,8 +922,11 @@ struct nvgpu_gpu_set_therm_alert_limit_args {
|
|||||||
#define NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT \
|
#define NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT \
|
||||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 39, \
|
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 39, \
|
||||||
struct nvgpu_gpu_set_therm_alert_limit_args)
|
struct nvgpu_gpu_set_therm_alert_limit_args)
|
||||||
|
#define NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS \
|
||||||
|
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 40, \
|
||||||
|
struct nvgpu_gpu_set_deterministic_opts_args)
|
||||||
#define NVGPU_GPU_IOCTL_LAST \
|
#define NVGPU_GPU_IOCTL_LAST \
|
||||||
_IOC_NR(NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT)
|
_IOC_NR(NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS)
|
||||||
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
||||||
sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
|
sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user