diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 2c2850c63..6eecebf59 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -177,7 +177,7 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, } static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, - u32 timeslice_period, bool interleave) + u32 timeslice_period) { void *inst_ptr; int shift = 0, value = 0; @@ -205,30 +205,6 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c, gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | ccsr_channel_enable_set_true_f()); - if (c->interleave != interleave) { - mutex_lock(&c->g->interleave_lock); - c->interleave = interleave; - if (interleave) - if (c->g->num_interleaved_channels >= - MAX_INTERLEAVED_CHANNELS) { - gk20a_err(dev_from_gk20a(c->g), - "Change of priority would exceed runlist length, only changing timeslice\n"); - c->interleave = false; - } else - c->g->num_interleaved_channels += 1; - else - c->g->num_interleaved_channels -= 1; - - mutex_unlock(&c->g->interleave_lock); - gk20a_dbg_info("Set channel %d to interleave %d", - c->hw_chid, c->interleave); - - gk20a_fifo_set_channel_priority( - c->g, 0, c->hw_chid, c->interleave); - c->g->ops.fifo.update_runlist( - c->g, 0, ~0, true, false); - } - return 0; } @@ -711,6 +687,32 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch, return 0; } +static int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch, + u32 level) +{ + struct gk20a *g = ch->g; + int ret; + + if (gk20a_is_channel_marked_as_tsg(ch)) { + gk20a_err(dev_from_gk20a(g), "invalid operation for TSG!\n"); + return -EINVAL; + } + + switch (level) { + case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: + case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: + case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH: + ret = g->ops.fifo.set_runlist_interleave(g, ch->hw_chid, + false, 0, level); + break; + default: + ret = -EINVAL; + break; + } + + return ret ? ret : g->ops.fifo.update_runlist(g, 0, ~0, true, true); +} + static int gk20a_init_error_notifier(struct channel_gk20a *ch, struct nvgpu_set_error_notifier *args) { @@ -899,17 +901,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch) } mutex_unlock(&f->deferred_reset_mutex); - if (ch->interleave) { - ch->interleave = false; - gk20a_fifo_set_channel_priority( - ch->g, 0, ch->hw_chid, ch->interleave); - - mutex_lock(&f->g->interleave_lock); - WARN_ON(f->g->num_interleaved_channels == 0); - f->g->num_interleaved_channels -= 1; - mutex_unlock(&f->g->interleave_lock); - } - if (!ch->bound) goto release; @@ -1154,11 +1145,8 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g) ch->has_timedout = false; ch->wdt_enabled = true; ch->obj_class = 0; - ch->interleave = false; ch->clean_up.scheduled = false; - gk20a_fifo_set_channel_priority( - ch->g, 0, ch->hw_chid, ch->interleave); - + ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW; /* The channel is *not* runnable at this point. It still needs to have * an address space bound and allocate a gpfifo and grctx. */ @@ -2613,7 +2601,6 @@ unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait) int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority) { u32 timeslice_timeout; - bool interleave = false; if (gk20a_is_channel_marked_as_tsg(ch)) { gk20a_err(dev_from_gk20a(ch->g), @@ -2630,8 +2617,6 @@ int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority) timeslice_timeout = ch->g->timeslice_medium_priority_us; break; case NVGPU_PRIORITY_HIGH: - if (ch->g->interleave_high_priority) - interleave = true; timeslice_timeout = ch->g->timeslice_high_priority_us; break; default: @@ -2640,7 +2625,7 @@ int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority) } return channel_gk20a_set_schedule_params(ch, - timeslice_timeout, interleave); + timeslice_timeout); } static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, @@ -3045,6 +3030,18 @@ long gk20a_channel_ioctl(struct file *filp, err = gk20a_channel_set_wdt_status(ch, (struct nvgpu_channel_wdt_args *)buf); break; + case NVGPU_IOCTL_CHANNEL_SET_RUNLIST_INTERLEAVE: + err = gk20a_busy(dev); + if (err) { + dev_err(&dev->dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = gk20a_channel_set_runlist_interleave(ch, + ((struct nvgpu_runlist_interleave_args *)buf)->level); + gk20a_idle(dev); + break; default: dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd); err = -ENOTTY; diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 4aea9d19a..3f5a657ac 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -188,8 +188,7 @@ struct channel_gk20a { spinlock_t update_fn_lock; /* make access to the two above atomic */ struct work_struct update_fn_work; - /* true if channel is interleaved with lower priority channels */ - bool interleave; + u32 interleave_level; }; static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 769960af2..28cc3086a 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -303,12 +303,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) if (!runlist->active_tsgs) goto clean_up_runlist_info; - runlist->high_prio_channels = - kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), - GFP_KERNEL); - if (!runlist->high_prio_channels) - goto clean_up_runlist_info; - runlist_size = ram_rl_entry_size_v() * f->num_runlist_entries; for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); @@ -337,9 +331,6 @@ clean_up_runlist_info: kfree(runlist->active_tsgs); runlist->active_tsgs = NULL; - kfree(runlist->high_prio_channels); - runlist->high_prio_channels = NULL; - kfree(f->runlist_info); f->runlist_info = NULL; @@ -2162,32 +2153,153 @@ static inline u32 gk20a_get_tsg_runlist_entry_0(struct tsg_gk20a *tsg) return runlist_entry_0; } -/* add all active high priority channels */ -static inline u32 gk20a_fifo_runlist_add_high_prio_entries( - struct fifo_gk20a *f, - struct fifo_runlist_info_gk20a *runlist, - u32 *runlist_entry) +/* recursively construct a runlist with interleaved bare channels and TSGs */ +static u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f, + struct fifo_runlist_info_gk20a *runlist, + u32 cur_level, + u32 *runlist_entry, + bool interleave_enabled, + bool prev_empty, + u32 *entries_left) { - struct channel_gk20a *ch = NULL; - unsigned long high_prio_chid; - u32 count = 0; + bool last_level = cur_level == NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH; + struct channel_gk20a *ch; + bool skip_next = false; + u32 chid, tsgid, count = 0; - for_each_set_bit(high_prio_chid, - runlist->high_prio_channels, f->num_channels) { - ch = &f->channel[high_prio_chid]; + gk20a_dbg_fn(""); - if (!gk20a_is_channel_marked_as_tsg(ch) && - test_bit(high_prio_chid, runlist->active_channels) == 1) { - gk20a_dbg_info("add high prio channel %lu to runlist", - high_prio_chid); - runlist_entry[0] = ram_rl_entry_chid_f(high_prio_chid); + /* for each bare channel, CH, on this level, insert all higher-level + channels and TSGs before inserting CH. */ + for_each_set_bit(chid, runlist->active_channels, f->num_channels) { + ch = &f->channel[chid]; + + if (ch->interleave_level != cur_level) + continue; + + if (gk20a_is_channel_marked_as_tsg(ch)) + continue; + + if (!last_level && !skip_next) { + runlist_entry = gk20a_runlist_construct_locked(f, + runlist, + cur_level + 1, + runlist_entry, + interleave_enabled, + false, + entries_left); + /* if interleaving is disabled, higher-level channels + and TSGs only need to be inserted once */ + if (!interleave_enabled) + skip_next = true; + } + + if (!(*entries_left)) + return NULL; + + gk20a_dbg_info("add channel %d to runlist", chid); + runlist_entry[0] = ram_rl_entry_chid_f(chid); + runlist_entry[1] = 0; + runlist_entry += 2; + count++; + (*entries_left)--; + } + + /* for each TSG, T, on this level, insert all higher-level channels + and TSGs before inserting T. */ + for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) { + struct tsg_gk20a *tsg = &f->tsg[tsgid]; + + if (tsg->interleave_level != cur_level) + continue; + + if (!last_level && !skip_next) { + runlist_entry = gk20a_runlist_construct_locked(f, + runlist, + cur_level + 1, + runlist_entry, + interleave_enabled, + false, + entries_left); + if (!interleave_enabled) + skip_next = true; + } + + if (!(*entries_left)) + return NULL; + + /* add TSG entry */ + gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid); + runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg); + runlist_entry[1] = 0; + runlist_entry += 2; + count++; + (*entries_left)--; + + mutex_lock(&tsg->ch_list_lock); + /* add runnable channels bound to this TSG */ + list_for_each_entry(ch, &tsg->ch_list, ch_entry) { + if (!test_bit(ch->hw_chid, + runlist->active_channels)) + continue; + + if (!(*entries_left)) { + mutex_unlock(&tsg->ch_list_lock); + return NULL; + } + + gk20a_dbg_info("add channel %d to runlist", + ch->hw_chid); + runlist_entry[0] = ram_rl_entry_chid_f(ch->hw_chid); runlist_entry[1] = 0; runlist_entry += 2; count++; + (*entries_left)--; } + mutex_unlock(&tsg->ch_list_lock); } - return count; + /* append entries from higher level if this level is empty */ + if (!count && !last_level) + runlist_entry = gk20a_runlist_construct_locked(f, + runlist, + cur_level + 1, + runlist_entry, + interleave_enabled, + true, + entries_left); + + /* + * if previous and this level have entries, append + * entries from higher level. + * + * ex. dropping from MEDIUM to LOW, need to insert HIGH + */ + if (interleave_enabled && count && !prev_empty && !last_level) + runlist_entry = gk20a_runlist_construct_locked(f, + runlist, + cur_level + 1, + runlist_entry, + interleave_enabled, + false, + entries_left); + return runlist_entry; +} + +int gk20a_fifo_set_runlist_interleave(struct gk20a *g, + u32 id, + bool is_tsg, + u32 runlist_id, + u32 new_level) +{ + gk20a_dbg_fn(""); + + if (is_tsg) + g->fifo.tsg[id].interleave_level = new_level; + else + g->fifo.channel[id].interleave_level = new_level; + + return 0; } static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, @@ -2198,14 +2310,11 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, struct fifo_gk20a *f = &g->fifo; struct fifo_runlist_info_gk20a *runlist = NULL; u32 *runlist_entry_base = NULL; - u32 *runlist_entry = NULL; u64 runlist_iova; u32 old_buf, new_buf; - u32 chid, tsgid; struct channel_gk20a *ch = NULL; struct tsg_gk20a *tsg = NULL; u32 count = 0; - u32 count_channels_in_tsg; runlist = &f->runlist_info[runlist_id]; /* valid channel, add/remove it from active list. @@ -2254,91 +2363,23 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id, if (hw_chid != ~0 || /* add/remove a valid channel */ add /* resume to add all channels back */) { - runlist_entry = runlist_entry_base; + u32 max_entries = f->num_runlist_entries; + u32 *runlist_end; - /* Runlist manipulation: - Insert an entry of all high priority channels inbetween - all lower priority channels. This ensure that the maximum - delay a runnable high priority channel has to wait is one - medium timeslice + any context switching overhead + - wait on other high priority channels. - add non-TSG channels first */ - for_each_set_bit(chid, - runlist->active_channels, f->num_channels) { - ch = &f->channel[chid]; - - if (!gk20a_is_channel_marked_as_tsg(ch) && - !ch->interleave) { - u32 added; - - gk20a_dbg_info("add normal prio channel %d to runlist", - chid); - runlist_entry[0] = ram_rl_entry_chid_f(chid); - runlist_entry[1] = 0; - runlist_entry += 2; - count++; - - added = gk20a_fifo_runlist_add_high_prio_entries( - f, + runlist_end = gk20a_runlist_construct_locked(f, runlist, - runlist_entry); - count += added; - runlist_entry += 2 * added; - } + 0, + runlist_entry_base, + g->runlist_interleave, + true, + &max_entries); + if (!runlist_end) { + ret = -E2BIG; + goto clean_up; } - /* if there were no lower priority channels, then just - * add the high priority channels once. */ - if (count == 0) { - count = gk20a_fifo_runlist_add_high_prio_entries( - f, - runlist, - runlist_entry); - runlist_entry += 2 * count; - } - - /* now add TSG entries and channels bound to TSG */ - mutex_lock(&f->tsg_inuse_mutex); - for_each_set_bit(tsgid, - runlist->active_tsgs, f->num_channels) { - u32 added; - tsg = &f->tsg[tsgid]; - /* add TSG entry */ - gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid); - runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg); - runlist_entry[1] = 0; - runlist_entry += 2; - count++; - - /* add runnable channels bound to this TSG */ - count_channels_in_tsg = 0; - mutex_lock(&tsg->ch_list_lock); - list_for_each_entry(ch, &tsg->ch_list, ch_entry) { - if (!test_bit(ch->hw_chid, - runlist->active_channels)) - continue; - gk20a_dbg_info("add channel %d to runlist", - ch->hw_chid); - runlist_entry[0] = - ram_rl_entry_chid_f(ch->hw_chid); - runlist_entry[1] = 0; - runlist_entry += 2; - count++; - count_channels_in_tsg++; - } - mutex_unlock(&tsg->ch_list_lock); - - WARN_ON(tsg->num_active_channels != - count_channels_in_tsg); - - added = gk20a_fifo_runlist_add_high_prio_entries( - f, - runlist, - runlist_entry); - count += added; - runlist_entry += 2 * added; - } - mutex_unlock(&f->tsg_inuse_mutex); + count = (runlist_end - runlist_entry_base) / 2; + WARN_ON(count > f->num_runlist_entries); } else /* suspend to remove all channels */ count = 0; @@ -2493,42 +2534,6 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g) return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); } -int gk20a_fifo_set_channel_priority( - struct gk20a *g, - u32 runlist_id, - u32 hw_chid, - bool interleave) -{ - struct fifo_runlist_info_gk20a *runlist = NULL; - struct fifo_gk20a *f = &g->fifo; - struct channel_gk20a *ch = NULL; - - if (hw_chid >= f->num_channels) - return -EINVAL; - - if (runlist_id >= f->max_runlists) - return -EINVAL; - - ch = &f->channel[hw_chid]; - - gk20a_dbg_fn(""); - - runlist = &f->runlist_info[runlist_id]; - - mutex_lock(&runlist->mutex); - - if (ch->interleave) - set_bit(hw_chid, runlist->high_prio_channels); - else - clear_bit(hw_chid, runlist->high_prio_channels); - - gk20a_dbg_fn("done"); - - mutex_unlock(&runlist->mutex); - - return 0; -} - struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, u32 hw_chid) { @@ -2545,4 +2550,5 @@ void gk20a_init_fifo(struct gpu_ops *gops) gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos; gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; + gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave; } diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index ee4e7328e..0979bf2b2 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -31,7 +31,6 @@ struct fifo_runlist_info_gk20a { unsigned long *active_channels; unsigned long *active_tsgs; - unsigned long *high_prio_channels; /* Each engine has its own SW and HW runlist buffer.*/ struct mem_desc mem[MAX_RUNLIST_BUFFERS]; u32 cur_buffer; @@ -184,8 +183,6 @@ void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g, int gk20a_fifo_wait_engine_idle(struct gk20a *g); u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g); u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g); -int gk20a_fifo_set_channel_priority(struct gk20a *g, u32 runlist_id, - u32 hw_chid, bool interleave); u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g, int *__id, bool *__is_tsg); bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, @@ -198,4 +195,9 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g, u32 hw_chid); void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg); +int gk20a_fifo_set_runlist_interleave(struct gk20a *g, + u32 id, + bool is_tsg, + u32 runlist_id, + u32 new_level); #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index fa2c61e19..0fee58e85 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -672,9 +672,6 @@ static int gk20a_init_support(struct platform_device *dev) mutex_init(&g->ch_wdt_lock); mutex_init(&g->poweroff_lock); - mutex_init(&g->interleave_lock); - g->num_interleaved_channels = 0; - g->remove_support = gk20a_remove_support; return 0; @@ -1439,14 +1436,11 @@ static int gk20a_probe(struct platform_device *dev) if (tegra_platform_is_silicon()) gk20a->timeouts_enabled = true; - gk20a->interleave_high_priority = true; + gk20a->runlist_interleave = true; gk20a->timeslice_low_priority_us = 1300; gk20a->timeslice_medium_priority_us = 2600; - if (gk20a->interleave_high_priority) - gk20a->timeslice_high_priority_us = 3000; - else - gk20a->timeslice_high_priority_us = 5200; + gk20a->timeslice_high_priority_us = 5200; /* Set up initial power settings. For non-slicon platforms, disable * * power features and for silicon platforms, read from platform data */ @@ -1527,11 +1521,11 @@ static int gk20a_probe(struct platform_device *dev) platform->debugfs, &gk20a->timeslice_high_priority_us); - gk20a->debugfs_interleave_high_priority = - debugfs_create_bool("interleave_high_priority", + gk20a->debugfs_runlist_interleave = + debugfs_create_bool("runlist_interleave", S_IRUGO|S_IWUSR, platform->debugfs, - &gk20a->interleave_high_priority); + &gk20a->runlist_interleave); gr_gk20a_debugfs_init(gk20a); gk20a_pmu_debugfs_init(dev); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index afdbeef7e..faccf04a4 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -54,8 +54,6 @@ struct acr_gm20b; 32 ns is the resolution of ptimer. */ #define PTIMER_REF_FREQ_HZ 31250000 -#define MAX_INTERLEAVED_CHANNELS 32 - struct cooling_device_gk20a { struct thermal_cooling_device *gk20a_cooling_dev; unsigned int gk20a_freq_state; @@ -268,6 +266,9 @@ struct gpu_ops { u32 (*get_num_fifos)(struct gk20a *g); u32 (*get_pbdma_signature)(struct gk20a *g); int (*channel_set_priority)(struct channel_gk20a *ch, u32 priority); + int (*set_runlist_interleave)(struct gk20a *g, u32 id, + bool is_tsg, u32 runlist_id, + u32 new_level); } fifo; struct pmu_v { /*used for change of enum zbc update cmd id from ver 0 to ver1*/ @@ -536,10 +537,7 @@ struct gk20a { u32 timeslice_low_priority_us; u32 timeslice_medium_priority_us; u32 timeslice_high_priority_us; - u32 interleave_high_priority; - - struct mutex interleave_lock; - u32 num_interleaved_channels; + u32 runlist_interleave; bool slcg_enabled; bool blcg_enabled; @@ -564,7 +562,7 @@ struct gk20a { struct dentry *debugfs_timeslice_low_priority_us; struct dentry *debugfs_timeslice_medium_priority_us; struct dentry *debugfs_timeslice_high_priority_us; - struct dentry *debugfs_interleave_high_priority; + struct dentry *debugfs_runlist_interleave; #endif struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c index 4421744cc..b41cca08c 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c @@ -228,6 +228,7 @@ int gk20a_tsg_open(struct gk20a *g, struct file *filp) tsg->tsg_gr_ctx = NULL; tsg->vm = NULL; + tsg->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW; filp->private_data = tsg; diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h index bcc4d0c40..7e0a75d14 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h @@ -49,6 +49,8 @@ struct tsg_gk20a { struct gr_ctx_desc *tsg_gr_ctx; struct vm_gk20a *vm; + + u32 interleave_level; }; int gk20a_enable_tsg(struct tsg_gk20a *tsg); diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index d1deffb93..3fded03c2 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c @@ -1,7 +1,7 @@ /* * GM20B Fifo * - * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -121,4 +121,5 @@ void gm20b_init_fifo(struct gpu_ops *gops) gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle; gops->fifo.get_num_fifos = gm20b_fifo_get_num_fifos; gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature; + gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave; } diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index e776e97cb..b4bb7f387 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c @@ -1,7 +1,7 @@ /* * Virtualized GPU Fifo * - * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -194,12 +194,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) if (!runlist->active_channels) goto clean_up_runlist_info; - runlist->high_prio_channels = - kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE), - GFP_KERNEL); - if (!runlist->high_prio_channels) - goto clean_up_runlist_info; - runlist_size = sizeof(u16) * f->num_channels; for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]); @@ -222,9 +216,6 @@ clean_up_runlist: gk20a_gmmu_free(g, &runlist->mem[i]); clean_up_runlist_info: - kfree(runlist->high_prio_channels); - runlist->high_prio_channels = NULL; - kfree(runlist->active_channels); runlist->active_channels = NULL; diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 442a84acb..0787d4e42 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -834,6 +834,28 @@ struct nvgpu_channel_wdt_args { #define NVGPU_IOCTL_CHANNEL_DISABLE_WDT 1 #define NVGPU_IOCTL_CHANNEL_ENABLE_WDT 2 +/* + * Interleaving channels in a runlist is an approach to improve + * GPU scheduling by allowing certain channels to appear multiple + * times on the runlist. The number of times a channel appears is + * governed by the following levels: + * + * low (L) : appears once + * medium (M): if L, appears L times + * else, appears once + * high (H) : if L, appears (M + 1) x L times + * else if M, appears M times + * else, appears once + */ +struct nvgpu_runlist_interleave_args { + __u32 level; + __u32 reserved; +}; +#define NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW 0 +#define NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM 1 +#define NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH 2 +#define NVGPU_RUNLIST_INTERLEAVE_NUM_LEVELS 3 + #define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD \ _IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args) #define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \ @@ -876,9 +898,11 @@ struct nvgpu_channel_wdt_args { _IOWR(NVGPU_IOCTL_MAGIC, 118, struct nvgpu_cycle_stats_snapshot_args) #define NVGPU_IOCTL_CHANNEL_WDT \ _IOW(NVGPU_IOCTL_MAGIC, 119, struct nvgpu_channel_wdt_args) +#define NVGPU_IOCTL_CHANNEL_SET_RUNLIST_INTERLEAVE \ + _IOW(NVGPU_IOCTL_MAGIC, 120, struct nvgpu_runlist_interleave_args) #define NVGPU_IOCTL_CHANNEL_LAST \ - _IOC_NR(NVGPU_IOCTL_CHANNEL_WDT) + _IOC_NR(NVGPU_IOCTL_CHANNEL_SET_RUNLIST_INTERLEAVE) #define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_submit_gpfifo_args) /*