gpu: nvgpu: move set_timeslice to tsg

Moved the following HALs from fifo to tsg
- set_timeslice
- default_timeslice_us

Renamed
- gk20a_tsg_set_timeslice -> nvgpu_tsg_set_timeslice
- min_timeslice_us -> tsg_timeslice_min_us
- max_timeslice_us -> tsg_timeslice_max_us

Scale timeslice to take into account PTIMER clock in
nvgpu_runlist_append_tsg.

Removed gk20a_channel_get_timescale_from_timeslice, and
instead moved timeout and scale computation into runlist HAL,
when building TSG entry:
- runlist.get_tsg_entry

Use ram_rl_entry_* accessors instead of hard coded values
for default and max timeslices.

Added #defines for min, max and default timeslices.

Jira NVGPU-3156

Change-Id: I447266c087c47c89cb6a4a7e4f30acf834b758f0
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2100052
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Thomas Fleury
2019-04-12 17:12:07 -07:00
committed by mobile promotions
parent 7a91e5c79b
commit 3fde3ae650
30 changed files with 178 additions and 183 deletions

View File

@@ -586,7 +586,7 @@ u32 nvgpu_ce_create_context(struct gk20a *g,
/* -1 means default channel timeslice value */
if (timeslice != -1) {
err = gk20a_fifo_tsg_set_timeslice(ce_ctx->tsg, timeslice);
err = g->ops.tsg.set_timeslice(ce_ctx->tsg, timeslice);
if (err != 0) {
nvgpu_err(g, "ce: set timesliced failed for CE context");
goto end;

View File

@@ -132,31 +132,6 @@ int channel_gk20a_commit_va(struct channel_gk20a *c)
return 0;
}
void gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
unsigned int timeslice_period,
unsigned int *__timeslice_timeout, unsigned int *__timeslice_scale)
{
unsigned int value = scale_ptimer(timeslice_period,
ptimer_scalingfactor10x(g->ptimer_src_freq));
unsigned int shift = 0;
/* value field is 8 bits long */
while (value >= BIT32(8)) {
value >>= 1U;
shift++;
}
/* time slice register is only 18bits long */
if ((value << shift) >= BIT32(19)) {
nvgpu_err(g, "Requested timeslice value is clamped to 18 bits\n");
value = 255;
shift = 10;
}
*__timeslice_timeout = value;
*__timeslice_scale = shift;
}
int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
{
return c->g->ops.runlist.update_for_channel(c->g, c->runlist_id,

View File

@@ -24,6 +24,7 @@
#include <nvgpu/channel.h>
#include <nvgpu/fifo.h>
#include <nvgpu/runlist.h>
#include <nvgpu/ptimer.h>
#include <nvgpu/bug.h>
#include <nvgpu/dma.h>
#include <nvgpu/rc.h>
@@ -64,6 +65,7 @@ static u32 nvgpu_runlist_append_tsg(struct gk20a *g,
u32 runlist_entry_words = f->runlist_entry_size / (u32)sizeof(u32);
struct channel_gk20a *ch;
u32 count = 0;
u32 timeslice;
nvgpu_log_fn(f->g, " ");
@@ -73,7 +75,16 @@ static u32 nvgpu_runlist_append_tsg(struct gk20a *g,
/* add TSG entry */
nvgpu_log_info(g, "add TSG %d to runlist", tsg->tsgid);
g->ops.runlist.get_tsg_entry(tsg, *runlist_entry);
/*
* timeslice is measured with PTIMER.
* On some platforms, PTIMER is lower than 1GHz.
*/
timeslice = scale_ptimer(tsg->timeslice_us,
ptimer_scalingfactor10x(g->ptimer_src_freq));
g->ops.runlist.get_tsg_entry(tsg, *runlist_entry, timeslice);
nvgpu_log_info(g, "tsg rl entries left %d runlist [0] %x [1] %x",
*entries_left,
(*runlist_entry)[0], (*runlist_entry)[1]);

View File

@@ -535,26 +535,33 @@ int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level)
return ret;
}
int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
int nvgpu_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice_us)
{
struct gk20a *g = tsg->g;
nvgpu_log(g, gpu_dbg_sched, "tsgid=%u timeslice=%u us", tsg->tsgid, timeslice);
nvgpu_log(g, gpu_dbg_sched, "tsgid=%u timeslice=%u us",
tsg->tsgid, timeslice_us);
return g->ops.fifo.tsg_set_timeslice(tsg, timeslice);
}
u32 gk20a_tsg_get_timeslice(struct tsg_gk20a *tsg)
{
struct gk20a *g = tsg->g;
if (tsg->timeslice_us == 0U) {
return g->ops.fifo.default_timeslice_us(g);
if (timeslice_us < g->tsg_timeslice_min_us ||
timeslice_us > g->tsg_timeslice_max_us) {
return -EINVAL;
}
tsg->timeslice_us = timeslice_us;
return g->ops.runlist.reload(g, tsg->runlist_id, true, true);
}
u32 nvgpu_tsg_get_timeslice(struct tsg_gk20a *tsg)
{
return tsg->timeslice_us;
}
u32 nvgpu_tsg_default_timeslice_us(struct gk20a *g)
{
return NVGPU_TSG_TIMESLICE_DEFAULT_US;
}
void nvgpu_tsg_enable_sched(struct gk20a *g, struct tsg_gk20a *tsg)
{
nvgpu_fifo_runlist_set_state(g, BIT32(tsg->runlist_id),
@@ -618,9 +625,7 @@ int nvgpu_tsg_open_common(struct gk20a *g, struct tsg_gk20a *tsg, pid_t pid)
tsg->vm = NULL;
tsg->interleave_level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW;
tsg->timeslice_us = 0U;
tsg->timeslice_timeout = 0U;
tsg->timeslice_scale = 0U;
tsg->timeslice_us = g->ops.tsg.default_timeslice_us(g);
tsg->runlist_id = FIFO_INVAL_TSG_ID;
tsg->sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE;
tsg->gr_ctx = nvgpu_alloc_gr_ctx_struct(g);

View File

@@ -478,7 +478,7 @@ int vgpu_fifo_isr(struct gk20a *g, struct tegra_vgpu_fifo_intr_info *info)
return 0;
}
u32 vgpu_fifo_default_timeslice_us(struct gk20a *g)
u32 vgpu_tsg_default_timeslice_us(struct gk20a *g)
{
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);

View File

@@ -49,7 +49,7 @@ int vgpu_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg);
int vgpu_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice);
int vgpu_tsg_force_reset_ch(struct channel_gk20a *ch,
u32 err_code, bool verbose);
u32 vgpu_fifo_default_timeslice_us(struct gk20a *g);
u32 vgpu_tsg_default_timeslice_us(struct gk20a *g);
int vgpu_tsg_open(struct tsg_gk20a *tsg);
void vgpu_tsg_release(struct tsg_gk20a *tsg);
int vgpu_tsg_bind_channel(struct tsg_gk20a *tsg, struct channel_gk20a *ch);

View File

@@ -411,10 +411,8 @@ static const struct gpu_ops vgpu_gp10b_ops = {
},
.fifo = {
.init_fifo_setup_hw = vgpu_init_fifo_setup_hw,
.default_timeslice_us = vgpu_fifo_default_timeslice_us,
.preempt_channel = vgpu_fifo_preempt_channel,
.preempt_tsg = vgpu_fifo_preempt_tsg,
.tsg_set_timeslice = vgpu_tsg_set_timeslice,
.is_preempt_pending = NULL,
.reset_enable_hw = NULL,
.recover = NULL,
@@ -562,6 +560,8 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
.force_reset = vgpu_tsg_force_reset_ch,
.post_event_id = nvgpu_tsg_post_event_id,
.set_timeslice = vgpu_tsg_set_timeslice,
.default_timeslice_us = vgpu_tsg_default_timeslice_us,
},
.netlist = {
.get_netlist_name = gp10b_netlist_get_name,

View File

@@ -500,10 +500,8 @@ static const struct gpu_ops vgpu_gv11b_ops = {
},
.fifo = {
.init_fifo_setup_hw = vgpu_gv11b_init_fifo_setup_hw,
.default_timeslice_us = vgpu_fifo_default_timeslice_us,
.preempt_channel = vgpu_fifo_preempt_channel,
.preempt_tsg = vgpu_fifo_preempt_tsg,
.tsg_set_timeslice = vgpu_tsg_set_timeslice,
.is_preempt_pending = gv11b_fifo_is_preempt_pending,
.reset_enable_hw = NULL,
.recover = NULL,
@@ -649,6 +647,8 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
.force_reset = vgpu_tsg_force_reset_ch,
.post_event_id = nvgpu_tsg_post_event_id,
.set_timeslice = vgpu_tsg_set_timeslice,
.default_timeslice_us = vgpu_tsg_default_timeslice_us,
},
.usermode = {
.setup_hw = NULL,

View File

@@ -85,35 +85,6 @@ int gk20a_init_fifo_setup_hw(struct gk20a *g)
return 0;
}
u32 gk20a_fifo_default_timeslice_us(struct gk20a *g)
{
u64 slice = (((u64)(NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT <<
NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE) *
(u64)g->ptimer_src_freq) /
(u64)PTIMER_REF_FREQ_HZ);
BUG_ON(slice > U64(U32_MAX));
return (u32)slice;
}
int gk20a_fifo_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
{
struct gk20a *g = tsg->g;
if (timeslice < g->min_timeslice_us ||
timeslice > g->max_timeslice_us) {
return -EINVAL;
}
gk20a_channel_get_timescale_from_timeslice(g, timeslice,
&tsg->timeslice_timeout, &tsg->timeslice_scale);
tsg->timeslice_us = timeslice;
return g->ops.runlist.reload(g, tsg->runlist_id, true, true);
}
int gk20a_fifo_suspend(struct gk20a *g)
{
nvgpu_log_fn(g, " ");

View File

@@ -50,9 +50,6 @@ struct tsg_gk20a;
#define RC_YES 1U
#define RC_NO 0U
#define NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT 128UL
#define NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE 3UL
/*
* Number of entries in the kickoff latency buffer, used to calculate
* the profiling and histogram. This number is calculated to be statistically
@@ -219,8 +216,6 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g);
void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
unsigned long fault_id);
int gk20a_fifo_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice);
#ifdef CONFIG_DEBUG_FS
struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g);
void gk20a_fifo_profile_release(struct gk20a *g,
@@ -242,8 +237,6 @@ static inline void gk20a_fifo_profile_snapshot(
}
#endif
u32 gk20a_fifo_default_timeslice_us(struct gk20a *g);
int gk20a_fifo_init_pbdma_map(struct gk20a *g, u32 *pbdma_map, u32 num_pbdma);
u32 gk20a_fifo_get_runlist_timeslice(struct gk20a *g);
u32 gk20a_fifo_get_pb_timeslice(struct gk20a *g);

View File

@@ -30,6 +30,7 @@
#include <nvgpu/error_notifier.h>
#include <nvgpu/nvhost.h>
#include <nvgpu/fifo.h>
#include <nvgpu/ptimer.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/pbdma_status.h>

View File

@@ -37,6 +37,9 @@
#define FECS_MAILBOX_0_ACK_RESTORE 0x4U
#define RL_MAX_TIMESLICE_TIMEOUT ram_rl_entry_timeslice_timeout_v(U32_MAX)
#define RL_MAX_TIMESLICE_SCALE ram_rl_entry_timeslice_scale_v(U32_MAX)
int gk20a_runlist_reschedule(struct channel_gk20a *ch, bool preempt_next)
{
return nvgpu_fifo_reschedule_runlist(ch, preempt_next, true);
@@ -142,35 +145,32 @@ u32 gk20a_runlist_length_max(struct gk20a *g)
return fifo_eng_runlist_length_max_v();
}
void gk20a_runlist_get_tsg_entry(struct tsg_gk20a *tsg, u32 *runlist)
void gk20a_runlist_get_tsg_entry(struct tsg_gk20a *tsg,
u32 *runlist, u32 timeslice)
{
struct gk20a *g = tsg->g;
u32 timeout = timeslice;
u32 scale = 0U;
u32 runlist_entry_0 = ram_rl_entry_id_f(tsg->tsgid) |
ram_rl_entry_type_tsg_f() |
ram_rl_entry_tsg_length_f(tsg->num_active_channels);
WARN_ON(timeslice == 0U);
if (tsg->timeslice_timeout != 0U) {
runlist_entry_0 |=
ram_rl_entry_timeslice_scale_f(tsg->timeslice_scale) |
ram_rl_entry_timeslice_timeout_f(tsg->timeslice_timeout);
} else {
/* safety check before casting */
#if (NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE & 0xffffffff00000000UL)
#error NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE too large for u32 cast
#endif
#if (NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT & 0xffffffff00000000UL)
#error NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT too large for u32 cast
#endif
runlist_entry_0 |=
ram_rl_entry_timeslice_scale_f(
(u32)NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE) |
ram_rl_entry_timeslice_timeout_f(
(u32)NVGPU_FIFO_DEFAULT_TIMESLICE_TIMEOUT);
while (timeout > RL_MAX_TIMESLICE_TIMEOUT) {
timeout >>= 1U;
scale++;
}
runlist[0] = runlist_entry_0;
runlist[1] = 0;
if (scale > RL_MAX_TIMESLICE_SCALE) {
nvgpu_err(g, "requested timeslice value is clamped\n");
timeout = RL_MAX_TIMESLICE_TIMEOUT;
scale = RL_MAX_TIMESLICE_SCALE;
}
runlist[0] = ram_rl_entry_id_f(tsg->tsgid) |
ram_rl_entry_type_tsg_f() |
ram_rl_entry_tsg_length_f(tsg->num_active_channels) |
ram_rl_entry_timeslice_scale_f(scale) |
ram_rl_entry_timeslice_timeout_f(timeout);
runlist[1] = 0;
}
void gk20a_runlist_get_ch_entry(struct channel_gk20a *ch, u32 *runlist)
@@ -265,4 +265,3 @@ void gk20a_runlist_write_state(struct gk20a *g, u32 runlists_mask,
nvgpu_writel(g, fifo_sched_disable_r(), reg_val);
}

View File

@@ -38,7 +38,8 @@ int gk20a_runlist_set_interleave(struct gk20a *g,
u32 gk20a_runlist_count_max(void);
u32 gk20a_runlist_entry_size(struct gk20a *g);
u32 gk20a_runlist_length_max(struct gk20a *g);
void gk20a_runlist_get_tsg_entry(struct tsg_gk20a *tsg, u32 *runlist);
void gk20a_runlist_get_tsg_entry(struct tsg_gk20a *tsg,
u32 *runlist, u32 timeslice);
void gk20a_runlist_get_ch_entry(struct channel_gk20a *ch, u32 *runlist);
void gk20a_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
u32 count, u32 buffer_index);

View File

@@ -29,6 +29,9 @@
#include <nvgpu/hw/gv11b/hw_fifo_gv11b.h>
#include <nvgpu/hw/gv11b/hw_ram_gv11b.h>
#define RL_MAX_TIMESLICE_TIMEOUT ram_rl_entry_tsg_timeslice_timeout_v(U32_MAX)
#define RL_MAX_TIMESLICE_SCALE ram_rl_entry_tsg_timeslice_scale_v(U32_MAX)
int gv11b_runlist_reschedule(struct channel_gk20a *ch, bool preempt_next)
{
/* gv11b allows multiple outstanding preempts,
@@ -46,24 +49,29 @@ u32 gv11b_runlist_entry_size(struct gk20a *g)
return ram_rl_entry_size_v();
}
void gv11b_runlist_get_tsg_entry(struct tsg_gk20a *tsg, u32 *runlist)
void gv11b_runlist_get_tsg_entry(struct tsg_gk20a *tsg,
u32 *runlist, u32 timeslice)
{
struct gk20a *g = tsg->g;
u32 runlist_entry_0 = ram_rl_entry_type_tsg_v();
u32 timeout = timeslice;
u32 scale = 0U;
if (tsg->timeslice_timeout != 0U) {
runlist_entry_0 |=
ram_rl_entry_tsg_timeslice_scale_f(tsg->timeslice_scale) |
ram_rl_entry_tsg_timeslice_timeout_f(tsg->timeslice_timeout);
} else {
runlist_entry_0 |=
ram_rl_entry_tsg_timeslice_scale_f(
ram_rl_entry_tsg_timeslice_scale_3_v()) |
ram_rl_entry_tsg_timeslice_timeout_f(
ram_rl_entry_tsg_timeslice_timeout_128_v());
WARN_ON(timeslice == 0U);
while (timeout > RL_MAX_TIMESLICE_TIMEOUT) {
timeout >>= 1U;
scale++;
}
runlist[0] = runlist_entry_0;
if (scale > RL_MAX_TIMESLICE_SCALE) {
nvgpu_err(g, "requested timeslice value is clamped\n");
timeout = RL_MAX_TIMESLICE_TIMEOUT;
scale = RL_MAX_TIMESLICE_SCALE;
}
runlist[0] = ram_rl_entry_type_tsg_v() |
ram_rl_entry_tsg_timeslice_scale_f(scale) |
ram_rl_entry_tsg_timeslice_timeout_f(timeout);
runlist[1] = ram_rl_entry_tsg_length_f(tsg->num_active_channels);
runlist[2] = ram_rl_entry_tsg_tsgid_f(tsg->tsgid);
runlist[3] = 0;

View File

@@ -30,7 +30,8 @@ struct tsg_gk20a;
int gv11b_runlist_reschedule(struct channel_gk20a *ch, bool preempt_next);
u32 gv11b_runlist_count_max(void);
u32 gv11b_runlist_entry_size(struct gk20a *g);
void gv11b_runlist_get_tsg_entry(struct tsg_gk20a *tsg, u32 *runlist);
void gv11b_runlist_get_tsg_entry(struct tsg_gk20a *tsg,
u32 *runlist, u32 timeslice);
void gv11b_runlist_get_ch_entry(struct channel_gk20a *ch, u32 *runlist);
#endif /* NVGPU_RUNLIST_GV11B_H */

View File

@@ -668,11 +668,9 @@ static const struct gpu_ops gm20b_ops = {
},
.fifo = {
.init_fifo_setup_hw = gk20a_init_fifo_setup_hw,
.default_timeslice_us = gk20a_fifo_default_timeslice_us,
.preempt_channel = gk20a_fifo_preempt_channel,
.preempt_tsg = gk20a_fifo_preempt_tsg,
.preempt_trigger = gk20a_fifo_preempt_trigger,
.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
.init_pbdma_map = gk20a_fifo_init_pbdma_map,
.is_preempt_pending = gk20a_fifo_is_preempt_pending,
.reset_enable_hw = gk20a_init_fifo_reset_enable_hw,
@@ -842,6 +840,8 @@ static const struct gpu_ops gm20b_ops = {
.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
.force_reset = nvgpu_tsg_force_reset_ch,
.post_event_id = nvgpu_tsg_post_event_id,
.set_timeslice = nvgpu_tsg_set_timeslice,
.default_timeslice_us = nvgpu_tsg_default_timeslice_us,
},
.netlist = {
.get_netlist_name = gm20b_netlist_get_name,

View File

@@ -735,11 +735,9 @@ static const struct gpu_ops gp10b_ops = {
},
.fifo = {
.init_fifo_setup_hw = gk20a_init_fifo_setup_hw,
.default_timeslice_us = gk20a_fifo_default_timeslice_us,
.preempt_channel = gk20a_fifo_preempt_channel,
.preempt_tsg = gk20a_fifo_preempt_tsg,
.preempt_trigger = gk20a_fifo_preempt_trigger,
.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
.init_pbdma_map = gk20a_fifo_init_pbdma_map,
.is_preempt_pending = gk20a_fifo_is_preempt_pending,
.reset_enable_hw = gk20a_init_fifo_reset_enable_hw,
@@ -922,6 +920,8 @@ static const struct gpu_ops gp10b_ops = {
.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
.force_reset = nvgpu_tsg_force_reset_ch,
.post_event_id = nvgpu_tsg_post_event_id,
.set_timeslice = nvgpu_tsg_set_timeslice,
.default_timeslice_us = nvgpu_tsg_default_timeslice_us,
},
.netlist = {
.get_netlist_name = gp10b_netlist_get_name,

View File

@@ -915,13 +915,11 @@ static const struct gpu_ops gv100_ops = {
},
.fifo = {
.init_fifo_setup_hw = gv11b_init_fifo_setup_hw,
.default_timeslice_us = gk20a_fifo_default_timeslice_us,
.preempt_channel = gv11b_fifo_preempt_channel,
.preempt_tsg = gv11b_fifo_preempt_tsg,
.preempt_trigger = gv11b_fifo_preempt_trigger,
.preempt_runlists_for_rc = gv11b_fifo_preempt_runlists_for_rc,
.preempt_poll_pbdma = gv11b_fifo_preempt_poll_pbdma,
.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
.init_pbdma_map = gk20a_fifo_init_pbdma_map,
.is_preempt_pending = gv11b_fifo_is_preempt_pending,
.reset_enable_hw = gk20a_init_fifo_reset_enable_hw,
@@ -1104,6 +1102,8 @@ static const struct gpu_ops gv100_ops = {
.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
.force_reset = nvgpu_tsg_force_reset_ch,
.post_event_id = nvgpu_tsg_post_event_id,
.set_timeslice = nvgpu_tsg_set_timeslice,
.default_timeslice_us = nvgpu_tsg_default_timeslice_us,
},
.usermode = {
.setup_hw = NULL,

View File

@@ -888,13 +888,11 @@ static const struct gpu_ops gv11b_ops = {
},
.fifo = {
.init_fifo_setup_hw = gv11b_init_fifo_setup_hw,
.default_timeslice_us = gk20a_fifo_default_timeslice_us,
.preempt_channel = gv11b_fifo_preempt_channel,
.preempt_tsg = gv11b_fifo_preempt_tsg,
.preempt_trigger = gv11b_fifo_preempt_trigger,
.preempt_runlists_for_rc = gv11b_fifo_preempt_runlists_for_rc,
.preempt_poll_pbdma = gv11b_fifo_preempt_poll_pbdma,
.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
.init_pbdma_map = gk20a_fifo_init_pbdma_map,
.is_preempt_pending = gv11b_fifo_is_preempt_pending,
.reset_enable_hw = gv11b_init_fifo_reset_enable_hw,
@@ -1079,6 +1077,8 @@ static const struct gpu_ops gv11b_ops = {
.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
.force_reset = nvgpu_tsg_force_reset_ch,
.post_event_id = nvgpu_tsg_post_event_id,
.set_timeslice = nvgpu_tsg_set_timeslice,
.default_timeslice_us = nvgpu_tsg_default_timeslice_us,
},
.usermode = {
.setup_hw = NULL,

View File

@@ -949,13 +949,11 @@ static const struct gpu_ops tu104_ops = {
},
.fifo = {
.init_fifo_setup_hw = tu104_init_fifo_setup_hw,
.default_timeslice_us = gk20a_fifo_default_timeslice_us,
.preempt_channel = gv11b_fifo_preempt_channel,
.preempt_tsg = gv11b_fifo_preempt_tsg,
.preempt_trigger = gv11b_fifo_preempt_trigger,
.preempt_runlists_for_rc = gv11b_fifo_preempt_runlists_for_rc,
.preempt_poll_pbdma = gv11b_fifo_preempt_poll_pbdma,
.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
.init_pbdma_map = gk20a_fifo_init_pbdma_map,
.is_preempt_pending = gv11b_fifo_is_preempt_pending,
.reset_enable_hw = gv11b_init_fifo_reset_enable_hw,
@@ -1140,6 +1138,8 @@ static const struct gpu_ops tu104_ops = {
.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
.force_reset = nvgpu_tsg_force_reset_ch,
.post_event_id = nvgpu_tsg_post_event_id,
.set_timeslice = nvgpu_tsg_set_timeslice,
.default_timeslice_us = nvgpu_tsg_default_timeslice_us,
},
.usermode = {
.setup_hw = tu104_usermode_setup_hw,

View File

@@ -995,8 +995,6 @@ struct gpu_ops {
u32 id, unsigned int id_type);
int (*preempt_poll_pbdma)(struct gk20a *g, u32 tsgid,
u32 pbdma_id);
int (*tsg_set_timeslice)(struct tsg_gk20a *tsg, u32 timeslice);
u32 (*default_timeslice_us)(struct gk20a *g);
int (*init_pbdma_map)(struct gk20a *g,
u32 *pbdma_map, u32 num_pbdma);
int (*is_preempt_pending)(struct gk20a *g, u32 id,
@@ -1083,7 +1081,8 @@ struct gpu_ops {
u32 (*count_max)(void);
u32 (*entry_size)(struct gk20a *g);
u32 (*length_max)(struct gk20a *g);
void (*get_tsg_entry)(struct tsg_gk20a *tsg, u32 *runlist);
void (*get_tsg_entry)(struct tsg_gk20a *tsg,
u32 *runlist, u32 timeslice);
void (*get_ch_entry)(struct channel_gk20a *ch, u32 *runlist);
void (*hw_submit)(struct gk20a *g, u32 runlist_id,
u32 count, u32 buffer_index);
@@ -1240,6 +1239,8 @@ struct gpu_ops {
u32 err_code, bool verbose);
void (*post_event_id)(struct tsg_gk20a *tsg,
enum nvgpu_event_id_type event_id);
int (*set_timeslice)(struct tsg_gk20a *tsg, u32 timeslice_us);
u32 (*default_timeslice_us)(struct gk20a *g);
} tsg;
struct {
void (*setup_hw)(struct gk20a *g);
@@ -1991,11 +1992,11 @@ struct gk20a {
struct nvgpu_mutex power_lock;
/* Channel priorities */
u32 timeslice_low_priority_us;
u32 timeslice_medium_priority_us;
u32 timeslice_high_priority_us;
u32 min_timeslice_us;
u32 max_timeslice_us;
u32 tsg_timeslice_low_priority_us;
u32 tsg_timeslice_medium_priority_us;
u32 tsg_timeslice_high_priority_us;
u32 tsg_timeslice_min_us;
u32 tsg_timeslice_max_us;
bool runlist_interleave;
struct nvgpu_mutex cg_pg_lock;

View File

@@ -30,6 +30,13 @@
#define NVGPU_INVALID_TSG_ID (U32_MAX)
#define NVGPU_TSG_TIMESLICE_LOW_PRIORITY_US 1300U
#define NVGPU_TSG_TIMESLICE_MEDIUM_PRIORITY_US 2600U
#define NVGPU_TSG_TIMESLICE_HIGH_PRIORITY_US 5200U
#define NVGPU_TSG_TIMESLICE_MIN_US 1000U
#define NVGPU_TSG_TIMESLICE_MAX_US 50000U
#define NVGPU_TSG_TIMESLICE_DEFAULT_US (128U << 3U)
struct gk20a;
struct channel_gk20a;
struct nvgpu_gr_ctx;
@@ -117,8 +124,9 @@ void nvgpu_tsg_post_event_id(struct tsg_gk20a *tsg,
bool nvgpu_tsg_check_ctxsw_timeout(struct tsg_gk20a *tsg,
bool *debug_dump, u32 *ms);
int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level);
int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice);
u32 gk20a_tsg_get_timeslice(struct tsg_gk20a *tsg);
int nvgpu_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice_us);
u32 nvgpu_tsg_get_timeslice(struct tsg_gk20a *tsg);
u32 nvgpu_tsg_default_timeslice_us(struct gk20a *g);
void nvgpu_tsg_enable_sched(struct gk20a *g, struct tsg_gk20a *tsg);
void nvgpu_tsg_disable_sched(struct gk20a *g, struct tsg_gk20a *tsg);
int gk20a_tsg_set_priority(struct gk20a *g, struct tsg_gk20a *tsg,

View File

@@ -154,6 +154,7 @@ nvgpu_rbtree_enum_start
nvgpu_readl
nvgpu_runlist_construct_locked
nvgpu_rwsem_init
nvgpu_tsg_default_timeslice_us
nvgpu_set_enabled
nvgpu_sgt_alignment
nvgpu_sgt_create_from_mem

View File

@@ -388,21 +388,21 @@ void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
g,
&disable_bigpage_fops);
l->debugfs_timeslice_low_priority_us =
debugfs_create_u32("timeslice_low_priority_us",
l->debugfs_tsg_timeslice_low_priority_us =
debugfs_create_u32("tsg_timeslice_low_priority_us",
S_IRUGO|S_IWUSR,
l->debugfs,
&g->timeslice_low_priority_us);
l->debugfs_timeslice_medium_priority_us =
debugfs_create_u32("timeslice_medium_priority_us",
&g->tsg_timeslice_low_priority_us);
l->debugfs_tsg_timeslice_medium_priority_us =
debugfs_create_u32("tsg_timeslice_medium_priority_us",
S_IRUGO|S_IWUSR,
l->debugfs,
&g->timeslice_medium_priority_us);
l->debugfs_timeslice_high_priority_us =
debugfs_create_u32("timeslice_high_priority_us",
&g->tsg_timeslice_medium_priority_us);
l->debugfs_tsg_timeslice_high_priority_us =
debugfs_create_u32("tsg_timeslice_high_priority_us",
S_IRUGO|S_IWUSR,
l->debugfs,
&g->timeslice_high_priority_us);
&g->tsg_timeslice_high_priority_us);
l->debugfs_runlist_interleave =
debugfs_create_bool("runlist_interleave",
S_IRUGO|S_IWUSR,

View File

@@ -30,6 +30,7 @@
#include <nvgpu/sizes.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/regops.h>
#include <nvgpu/tsg.h>
#include <nvgpu/gr/gr.h>
#include "platform_gk20a.h"
@@ -133,12 +134,15 @@ static void nvgpu_init_timeslice(struct gk20a *g)
{
g->runlist_interleave = true;
g->timeslice_low_priority_us = 1300;
g->timeslice_medium_priority_us = 2600;
g->timeslice_high_priority_us = 5200;
g->tsg_timeslice_low_priority_us =
NVGPU_TSG_TIMESLICE_LOW_PRIORITY_US;
g->tsg_timeslice_medium_priority_us =
NVGPU_TSG_TIMESLICE_MEDIUM_PRIORITY_US;
g->tsg_timeslice_high_priority_us =
NVGPU_TSG_TIMESLICE_HIGH_PRIORITY_US;
g->min_timeslice_us = 1000;
g->max_timeslice_us = 50000;
g->tsg_timeslice_min_us = NVGPU_TSG_TIMESLICE_MIN_US;
g->tsg_timeslice_max_us = NVGPU_TSG_TIMESLICE_MAX_US;
}
static void nvgpu_init_pm_vars(struct gk20a *g)

View File

@@ -545,7 +545,7 @@ static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g,
nvgpu_err(g, "failed to power on gpu");
goto done;
}
err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us);
err = g->ops.tsg.set_timeslice(tsg, arg->timeslice_us);
gk20a_idle(g);
done:
nvgpu_mutex_release(&sched->control_lock);
@@ -555,7 +555,7 @@ done:
static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g,
struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg)
{
arg->timeslice_us = gk20a_tsg_get_timeslice(tsg);
arg->timeslice_us = nvgpu_tsg_get_timeslice(tsg);
return 0;
}

View File

@@ -147,9 +147,9 @@ struct nvgpu_os_linux {
struct dentry *debugfs_poll_timeout_default;
struct dentry *debugfs_disable_bigpage;
struct dentry *debugfs_timeslice_low_priority_us;
struct dentry *debugfs_timeslice_medium_priority_us;
struct dentry *debugfs_timeslice_high_priority_us;
struct dentry *debugfs_tsg_timeslice_low_priority_us;
struct dentry *debugfs_tsg_timeslice_medium_priority_us;
struct dentry *debugfs_tsg_timeslice_high_priority_us;
struct dentry *debugfs_runlist_interleave;
struct dentry *debugfs_allocators;
struct dentry *debugfs_xve;

View File

@@ -241,7 +241,7 @@ static int gk20a_sched_dev_ioctl_tsg_set_timeslice(
if (err)
goto done;
err = gk20a_tsg_set_timeslice(tsg, arg->timeslice);
err = g->ops.tsg.set_timeslice(tsg, arg->timeslice);
gk20a_idle(g);

View File

@@ -934,15 +934,15 @@ static ssize_t tpc_fs_mask_read(struct device *dev,
static DEVICE_ATTR(tpc_fs_mask, ROOTRW, tpc_fs_mask_read, tpc_fs_mask_store);
static ssize_t min_timeslice_us_read(struct device *dev,
static ssize_t tsg_timeslice_min_us_read(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%u\n", g->min_timeslice_us);
return snprintf(buf, PAGE_SIZE, "%u\n", g->tsg_timeslice_min_us);
}
static ssize_t min_timeslice_us_store(struct device *dev,
static ssize_t tsg_timeslice_min_us_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct gk20a *g = get_gk20a(dev);
@@ -951,26 +951,26 @@ static ssize_t min_timeslice_us_store(struct device *dev,
if (kstrtoul(buf, 10, &val) < 0)
return -EINVAL;
if (val > g->max_timeslice_us)
if (val > g->tsg_timeslice_max_us)
return -EINVAL;
g->min_timeslice_us = val;
g->tsg_timeslice_min_us = val;
return count;
}
static DEVICE_ATTR(min_timeslice_us, ROOTRW, min_timeslice_us_read,
min_timeslice_us_store);
static DEVICE_ATTR(tsg_timeslice_min_us, ROOTRW, tsg_timeslice_min_us_read,
tsg_timeslice_min_us_store);
static ssize_t max_timeslice_us_read(struct device *dev,
static ssize_t tsg_timeslice_max_us_read(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%u\n", g->max_timeslice_us);
return snprintf(buf, PAGE_SIZE, "%u\n", g->tsg_timeslice_max_us);
}
static ssize_t max_timeslice_us_store(struct device *dev,
static ssize_t tsg_timeslice_max_us_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct gk20a *g = get_gk20a(dev);
@@ -979,16 +979,16 @@ static ssize_t max_timeslice_us_store(struct device *dev,
if (kstrtoul(buf, 10, &val) < 0)
return -EINVAL;
if (val < g->min_timeslice_us)
if (val < g->tsg_timeslice_min_us)
return -EINVAL;
g->max_timeslice_us = val;
g->tsg_timeslice_max_us = val;
return count;
}
static DEVICE_ATTR(max_timeslice_us, ROOTRW, max_timeslice_us_read,
max_timeslice_us_store);
static DEVICE_ATTR(tsg_timeslice_max_us, ROOTRW, tsg_timeslice_max_us_read,
tsg_timeslice_max_us_store);
static ssize_t comptag_mem_deduct_store(struct device *dev,
struct device_attribute *attr,
@@ -1053,8 +1053,8 @@ void nvgpu_remove_sysfs(struct device *dev)
device_remove_file(dev, &dev_attr_allow_all);
device_remove_file(dev, &dev_attr_tpc_fs_mask);
device_remove_file(dev, &dev_attr_tpc_pg_mask);
device_remove_file(dev, &dev_attr_min_timeslice_us);
device_remove_file(dev, &dev_attr_max_timeslice_us);
device_remove_file(dev, &dev_attr_tsg_timeslice_min_us);
device_remove_file(dev, &dev_attr_tsg_timeslice_max_us);
#ifdef CONFIG_TEGRA_GK20A_NVHOST
nvgpu_nvhost_remove_symlink(get_gk20a(dev));
@@ -1104,8 +1104,8 @@ int nvgpu_create_sysfs(struct device *dev)
error |= device_create_file(dev, &dev_attr_allow_all);
error |= device_create_file(dev, &dev_attr_tpc_fs_mask);
error |= device_create_file(dev, &dev_attr_tpc_pg_mask);
error |= device_create_file(dev, &dev_attr_min_timeslice_us);
error |= device_create_file(dev, &dev_attr_max_timeslice_us);
error |= device_create_file(dev, &dev_attr_tsg_timeslice_min_us);
error |= device_create_file(dev, &dev_attr_tsg_timeslice_max_us);
#ifdef CONFIG_TEGRA_GK20A_NVHOST
error |= nvgpu_nvhost_create_symlink(g);

View File

@@ -30,6 +30,7 @@
#include <nvgpu/gk20a.h>
#include "hal/fifo/runlist_gk20a.h"
#include "hal/fifo/tsg_gk20a.h"
static void setup_fifo(struct gk20a *g, unsigned long *tsg_map,
unsigned long *ch_map, struct tsg_gk20a *tsgs,
@@ -51,6 +52,12 @@ static void setup_fifo(struct gk20a *g, unsigned long *tsg_map,
/* to debug, change this to (u64)-1 */
g->log_mask = 0;
/*
* set PTIMER src freq to its nominal frequency to avoid rounding
* errors when scaling timeslice.
*/
g->ptimer_src_freq = 31250000;
f->tsg = tsgs;
f->channel = chs;
f->num_channels = num_channels;
@@ -63,6 +70,7 @@ static void setup_fifo(struct gk20a *g, unsigned long *tsg_map,
f->runlist_entry_size = 2 * sizeof(u32);
g->ops.runlist.get_tsg_entry = gk20a_runlist_get_tsg_entry;
g->ops.runlist.get_ch_entry = gk20a_runlist_get_ch_entry;
g->ops.tsg.default_timeslice_us = nvgpu_tsg_default_timeslice_us;
g->runlist_interleave = interleave;
@@ -123,6 +131,11 @@ static int run_format_test(struct unit_module *m, struct fifo_gk20a *f,
unit_return_fail(m, "number of entries mismatch %d\n", n);
}
if (memcmp(rl_data, expect_header, 2 * sizeof(u32)) != 0) {
unit_err(m, "rl_data[0]=%08x", rl_data[0]);
unit_err(m, "rl_data[1]=%08x", rl_data[1]);
unit_err(m, "expect_header[0]=%08x", expect_header[0]);
unit_err(m, "expect_header[1]=%08x", expect_header[1]);
unit_return_fail(m, "tsg header mismatch\n");
}
if (memcmp(rl_data + 2, expect_channel, 2 * n_ch * sizeof(u32)) != 0) {
@@ -147,7 +160,7 @@ static struct tsg_fmt_test_args {
/* priority 2, five channels */
{ 5, 0x1f, 2, 0, { 0x1600e000, 0 }, { 0, 0, 1, 0, 2, 0, 3, 0, 4, 0 } },
/* priority 0, one channel, nondefault timeslice timeout */
{ 1, 0x01, 0, 0xaa, { 0x06a8e000, 0 }, { 0, 0 } },
{ 1, 0x01, 0, 0xaa<<3, { 0x06a8e000, 0 }, { 0, 0 } },
/* priority 0, three channels with two inactives in the middle */
{ 3, 0x01 | 0x04 | 0x10, 0, 0, { 0x0e00e000, 0 }, { 0, 0, 2, 0, 4, 0 } },
};
@@ -178,8 +191,11 @@ static int test_tsg_format_gen(struct unit_module *m, struct gk20a *g,
active_chs_map = test_args->chs_bitmap;
tsgs[0].timeslice_timeout = test_args->timeslice;
tsgs[0].timeslice_scale = NVGPU_FIFO_DEFAULT_TIMESLICE_SCALE;
if (test_args->timeslice == 0U) {
tsgs[0].timeslice_us = g->ops.tsg.default_timeslice_us(g);
} else {
tsgs[0].timeslice_us = test_args->timeslice;
}
ret = run_format_test(m, f, &tsgs[0], chs, test_args->level,
test_args->channels, rl_data,