diff --git a/drivers/gpu/nvgpu/common/fifo/runlist.c b/drivers/gpu/nvgpu/common/fifo/runlist.c index 56cd4e6c1..6eb86d71e 100644 --- a/drivers/gpu/nvgpu/common/fifo/runlist.c +++ b/drivers/gpu/nvgpu/common/fifo/runlist.c @@ -302,10 +302,10 @@ static u32 nvgpu_runlist_append_flat(struct nvgpu_fifo *f, u32 nvgpu_runlist_construct_locked(struct nvgpu_fifo *f, struct nvgpu_runlist *runlist, - u32 buf_id, + struct nvgpu_runlist_domain *domain, u32 max_entries) { - u32 *runlist_entry_base = runlist->mem[buf_id].cpu_va; + u32 *runlist_entry_base = domain->mem->mem.cpu_va; /* * The entry pointer and capacity counter that live on the stack here @@ -323,6 +323,7 @@ u32 nvgpu_runlist_construct_locked(struct nvgpu_fifo *f, static bool nvgpu_runlist_modify_active_locked(struct gk20a *g, struct nvgpu_runlist *runlist, + struct nvgpu_runlist_domain *domain, struct nvgpu_channel *ch, bool add) { struct nvgpu_tsg *tsg = NULL; @@ -370,29 +371,31 @@ static bool nvgpu_runlist_modify_active_locked(struct gk20a *g, static int nvgpu_runlist_reconstruct_locked(struct gk20a *g, struct nvgpu_runlist *runlist, - u32 buf_id, bool add_entries) + struct nvgpu_runlist_domain *domain, + bool add_entries) { u32 num_entries; struct nvgpu_fifo *f = &g->fifo; rl_dbg(g, "[%u] switch to new buffer 0x%16llx", - runlist->id, (u64)nvgpu_mem_get_addr(g, &runlist->mem[buf_id])); + runlist->id, (u64)nvgpu_mem_get_addr(g, &domain->mem->mem)); if (!add_entries) { - runlist->count = 0; + domain->mem->count = 0; return 0; } - num_entries = nvgpu_runlist_construct_locked(f, runlist, buf_id, + num_entries = nvgpu_runlist_construct_locked(f, runlist, domain, f->num_runlist_entries); if (num_entries == RUNLIST_APPEND_FAILURE) { return -E2BIG; } - runlist->count = num_entries; + + domain->mem->count = num_entries; NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532") NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532") NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532") - WARN_ON(runlist->count > f->num_runlist_entries); + WARN_ON(domain->mem->count > f->num_runlist_entries); NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3)) NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4)) NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6)) @@ -405,11 +408,12 @@ int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl, bool wait_for_finish) { int ret = 0; - u32 buf_id; bool add_entries; + struct nvgpu_runlist_mem *mem_tmp; + struct nvgpu_runlist_domain *domain = rl->domain; if (ch != NULL) { - bool update = nvgpu_runlist_modify_active_locked(g, rl, ch, add); + bool update = nvgpu_runlist_modify_active_locked(g, rl, domain, ch, add); if (!update) { /* no change in runlist contents */ return 0; @@ -421,15 +425,29 @@ int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl, add_entries = add; } - /* double buffering, swap to next */ - buf_id = (rl->cur_buffer == 0U) ? 1U : 0U; - - ret = nvgpu_runlist_reconstruct_locked(g, rl, buf_id, add_entries); + ret = nvgpu_runlist_reconstruct_locked(g, rl, domain, add_entries); if (ret != 0) { return ret; } - g->ops.runlist.hw_submit(g, rl->id, rl->count, buf_id); + /* + * hw_submit updates mem_hw to hardware; swap the buffers now. mem + * becomes the previously scheduled buffer and it can be modified once + * the runlist lock is released. + */ + + mem_tmp = domain->mem; + domain->mem = domain->mem_hw; + domain->mem_hw = mem_tmp; + + /* + * A non-active domain may be updated, but submit still the currently + * active one just for simplicity. + * + * TODO: Later on, updates and submits will need to be totally + * decoupled so that submits are done only in the domain scheduler. + */ + g->ops.runlist.hw_submit(g, rl); if (wait_for_finish) { ret = g->ops.runlist.wait_pending(g, rl->id); @@ -446,8 +464,6 @@ int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl, } } - rl->cur_buffer = buf_id; - return ret; } @@ -473,8 +489,12 @@ int nvgpu_runlist_reschedule(struct nvgpu_channel *ch, bool preempt_next, g, g->pmu, PMU_MUTEX_ID_FIFO, &token); #endif - g->ops.runlist.hw_submit( - g, runlist->id, runlist->count, runlist->cur_buffer); + /* + * Note that the runlist memory is not rewritten; the currently active + * buffer is just resubmitted so that scheduling begins from the first + * entry in it. + */ + g->ops.runlist.hw_submit(g, runlist); if (preempt_next) { if (g->ops.runlist.reschedule_preempt_next_locked(ch, @@ -633,10 +653,16 @@ void nvgpu_runlist_set_state(struct gk20a *g, u32 runlists_mask, #endif } +static void free_rl_mem(struct gk20a *g, struct nvgpu_runlist_mem *mem) +{ + nvgpu_dma_free(g, &mem->mem); + nvgpu_kfree(g, mem); +} + void nvgpu_runlist_cleanup_sw(struct gk20a *g) { struct nvgpu_fifo *f = &g->fifo; - u32 i, j; + u32 i; struct nvgpu_runlist *runlist; if ((f->runlists == NULL) || (f->active_runlists == NULL)) { @@ -647,8 +673,14 @@ void nvgpu_runlist_cleanup_sw(struct gk20a *g) for (i = 0; i < f->num_runlists; i++) { runlist = &f->active_runlists[i]; - for (j = 0; j < MAX_RUNLIST_BUFFERS; j++) { - nvgpu_dma_free(g, &runlist->mem[j]); + + if (runlist->domain != NULL) { + free_rl_mem(g, runlist->domain->mem); + runlist->domain->mem = NULL; + free_rl_mem(g, runlist->domain->mem_hw); + runlist->domain->mem_hw = NULL; + nvgpu_kfree(g, runlist->domain); + runlist->domain = NULL; } nvgpu_kfree(g, runlist->active_channels); @@ -787,13 +819,61 @@ void nvgpu_runlist_init_enginfo(struct gk20a *g, struct nvgpu_fifo *f) nvgpu_log_fn(g, "done"); } +static struct nvgpu_runlist_mem *init_rl_mem(struct gk20a *g, u32 runlist_size) +{ + struct nvgpu_runlist_mem *mem = nvgpu_kzalloc(g, sizeof(*mem)); + int err; + + if (mem == NULL) { + return NULL; + } + + err = nvgpu_dma_alloc_flags_sys(g, + g->is_virtual ? + 0ULL : NVGPU_DMA_PHYSICALLY_ADDRESSED, + runlist_size, + &mem->mem); + if (err != 0) { + nvgpu_kfree(g, mem); + mem = NULL; + } + + return mem; +} + +static struct nvgpu_runlist_domain *nvgpu_init_rl_domain(struct gk20a *g, u32 runlist_size) +{ + struct nvgpu_runlist_domain *domain = nvgpu_kzalloc(g, sizeof(*domain)); + + if (domain == NULL) { + return NULL; + } + + domain->mem = init_rl_mem(g, runlist_size); + if (domain->mem == NULL) { + goto free_domain; + } + + domain->mem_hw = init_rl_mem(g, runlist_size); + if (domain->mem_hw == NULL) { + goto free_mem; + } + + return domain; +free_mem: + free_rl_mem(g, domain->mem); +free_domain: + nvgpu_kfree(g, domain); + return NULL; +} + static int nvgpu_init_active_runlist_mapping(struct gk20a *g) { struct nvgpu_runlist *runlist; struct nvgpu_fifo *f = &g->fifo; unsigned int runlist_id; size_t runlist_size; - u32 i, j; + u32 i; int err = 0; rl_dbg(g, "Building active runlist map."); @@ -840,26 +920,14 @@ static int nvgpu_init_active_runlist_mapping(struct gk20a *g) rl_dbg(g, " RL entries: %d", f->num_runlist_entries); rl_dbg(g, " RL size %zu", runlist_size); - for (j = 0; j < MAX_RUNLIST_BUFFERS; j++) { - err = nvgpu_dma_alloc_flags_sys(g, - g->is_virtual ? - 0ULL : NVGPU_DMA_PHYSICALLY_ADDRESSED, - runlist_size, - &runlist->mem[j]); - if (err != 0) { - nvgpu_err(g, "memory allocation failed"); - err = -ENOMEM; - goto clean_up_runlist; - } + runlist->domain = nvgpu_init_rl_domain(g, runlist_size); + if (runlist->domain == NULL) { + nvgpu_err(g, "memory allocation failed"); + err = -ENOMEM; + goto clean_up_runlist; } nvgpu_mutex_init(&runlist->runlist_lock); - - /* - * None of buffers is pinned if this value doesn't change. - * Otherwise, one of them (cur_buffer) must have been pinned. - */ - runlist->cur_buffer = MAX_RUNLIST_BUFFERS; } return 0; diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c index 30b9a48df..edf563ec5 100644 --- a/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c @@ -74,6 +74,7 @@ done: } static bool vgpu_runlist_modify_active_locked(struct gk20a *g, u32 runlist_id, + struct nvgpu_runlist_domain *domain, struct nvgpu_channel *ch, bool add) { struct nvgpu_fifo *f = &g->fifo; @@ -99,6 +100,7 @@ static bool vgpu_runlist_modify_active_locked(struct gk20a *g, u32 runlist_id, } static void vgpu_runlist_reconstruct_locked(struct gk20a *g, u32 runlist_id, + struct nvgpu_runlist_domain *domain, bool add_entries) { struct nvgpu_fifo *f = &g->fifo; @@ -111,7 +113,7 @@ static void vgpu_runlist_reconstruct_locked(struct gk20a *g, u32 runlist_id, u32 count = 0; unsigned long chid; - runlist_entry = runlist->mem[0].cpu_va; + runlist_entry = domain->mem->mem.cpu_va; nvgpu_assert(f->num_channels <= (unsigned int)U16_MAX); for_each_set_bit(chid, @@ -121,9 +123,9 @@ static void vgpu_runlist_reconstruct_locked(struct gk20a *g, u32 runlist_id, count++; } - runlist->count = count; + domain->mem->count = count; } else { - runlist->count = 0; + domain->mem->count = 0; } } @@ -132,14 +134,15 @@ static int vgpu_runlist_update_locked(struct gk20a *g, u32 runlist_id, bool wait_for_finish) { struct nvgpu_fifo *f = &g->fifo; - struct nvgpu_runlist *runlist; + struct nvgpu_runlist *runlist = f->runlists[runlist_id]; + struct nvgpu_runlist_domain *domain = runlist->domain; bool add_entries; nvgpu_log_fn(g, " "); if (ch != NULL) { bool update = vgpu_runlist_modify_active_locked(g, runlist_id, - ch, add); + domain, ch, add); if (!update) { /* no change in runlist contents */ return 0; @@ -151,12 +154,11 @@ static int vgpu_runlist_update_locked(struct gk20a *g, u32 runlist_id, add_entries = add; } - runlist = f->runlists[runlist_id]; - - vgpu_runlist_reconstruct_locked(g, runlist_id, add_entries); + vgpu_runlist_reconstruct_locked(g, runlist_id, domain, add_entries); return vgpu_submit_runlist(g, vgpu_get_handle(g), runlist_id, - runlist->mem[0].cpu_va, runlist->count); + domain->mem->mem.cpu_va, + domain->mem->count); } /* add/remove a channel from runlist diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga100.h b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga100.h index 192e8d254..062298722 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga100.h +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga100.h @@ -30,6 +30,5 @@ struct gk20a; u32 ga100_runlist_count_max(struct gk20a *g); -void ga100_runlist_hw_submit(struct gk20a *g, u32 runlist_id, - u32 count, u32 buffer_index); +void ga100_runlist_hw_submit(struct gk20a *g, struct nvgpu_runlist *runlist); #endif /* NVGPU_RUNLIST_FIFO_GA100_H */ diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga100_fusa.c b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga100_fusa.c index 196eb9a22..b461c10b6 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga100_fusa.c +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga100_fusa.c @@ -38,24 +38,20 @@ u32 ga100_runlist_count_max(struct gk20a *g) return nvgpu_get_litter_value(g, GPU_LIT_MAX_RUNLISTS_SUPPORTED); } -void ga100_runlist_hw_submit(struct gk20a *g, u32 runlist_id, - u32 count, u32 buffer_index) +void ga100_runlist_hw_submit(struct gk20a *g, struct nvgpu_runlist *runlist) { - struct nvgpu_runlist *runlist = NULL; u64 runlist_iova; u32 runlist_iova_lo, runlist_iova_hi; - runlist = g->fifo.runlists[runlist_id]; - - runlist_iova = nvgpu_mem_get_addr(g, &runlist->mem[buffer_index]); + runlist_iova = nvgpu_mem_get_addr(g, &runlist->domain->mem_hw->mem); runlist_iova_lo = u64_lo32(runlist_iova) >> runlist_submit_base_lo_ptr_align_shift_v(); runlist_iova_hi = u64_hi32(runlist_iova); - if (count != 0U) { + if (runlist->domain->mem_hw->count != 0U) { nvgpu_runlist_writel(g, runlist, runlist_submit_base_lo_r(), runlist_submit_base_lo_ptr_lo_f(runlist_iova_lo) | - nvgpu_aperture_mask(g, &runlist->mem[buffer_index], + nvgpu_aperture_mask(g, &runlist->domain->mem_hw->mem, runlist_submit_base_lo_target_sys_mem_noncoherent_f(), runlist_submit_base_lo_target_sys_mem_coherent_f(), runlist_submit_base_lo_target_vid_mem_f())); @@ -67,5 +63,5 @@ void ga100_runlist_hw_submit(struct gk20a *g, u32 runlist_id, /* TODO offset in runlist support */ nvgpu_runlist_writel(g, runlist, runlist_submit_r(), runlist_submit_offset_f(0U) | - runlist_submit_length_f(count)); + runlist_submit_length_f(runlist->domain->mem_hw->count)); } diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga10b.h b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga10b.h index faf9fe6ca..9de390ff0 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga10b.h +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga10b.h @@ -28,11 +28,11 @@ #include struct gk20a; +struct nvgpu_runlist; u32 ga10b_runlist_count_max(struct gk20a *g); u32 ga10b_runlist_length_max(struct gk20a *g); -void ga10b_runlist_hw_submit(struct gk20a *g, u32 runlist_id, - u32 count, u32 buffer_index); +void ga10b_runlist_hw_submit(struct gk20a *g, struct nvgpu_runlist *runlist); int ga10b_runlist_wait_pending(struct gk20a *g, u32 runlist_id); void ga10b_runlist_write_state(struct gk20a *g, u32 runlists_mask, u32 runlist_state); diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga10b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga10b_fusa.c index a097f7d00..f2081ff52 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga10b_fusa.c +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_ga10b_fusa.c @@ -48,24 +48,20 @@ u32 ga10b_runlist_length_max(struct gk20a *g) return runlist_submit_length_max_v(); } -void ga10b_runlist_hw_submit(struct gk20a *g, u32 runlist_id, - u32 count, u32 buffer_index) +void ga10b_runlist_hw_submit(struct gk20a *g, struct nvgpu_runlist *runlist) { - struct nvgpu_runlist *runlist = NULL; u64 runlist_iova; u32 runlist_iova_lo, runlist_iova_hi; - runlist = g->fifo.runlists[runlist_id]; - - runlist_iova = nvgpu_mem_get_addr(g, &runlist->mem[buffer_index]); + runlist_iova = nvgpu_mem_get_addr(g, &runlist->domain->mem_hw->mem); runlist_iova_lo = u64_lo32(runlist_iova) >> runlist_submit_base_lo_ptr_align_shift_v(); runlist_iova_hi = u64_hi32(runlist_iova); - if (count != 0U) { + if (runlist->domain->mem_hw->count != 0U) { nvgpu_runlist_writel(g, runlist, runlist_submit_base_lo_r(), runlist_submit_base_lo_ptr_lo_f(runlist_iova_lo) | - nvgpu_aperture_mask(g, &runlist->mem[buffer_index], + nvgpu_aperture_mask(g, &runlist->domain->mem_hw->mem, runlist_submit_base_lo_target_sys_mem_noncoherent_f(), runlist_submit_base_lo_target_sys_mem_coherent_f(), runlist_submit_base_lo_target_vid_mem_f())); @@ -77,7 +73,7 @@ void ga10b_runlist_hw_submit(struct gk20a *g, u32 runlist_id, /* TODO offset in runlist support */ nvgpu_runlist_writel(g, runlist, runlist_submit_r(), runlist_submit_offset_f(0U) | - runlist_submit_length_f(count)); + runlist_submit_length_f(runlist->domain->mem_hw->count)); } int ga10b_runlist_wait_pending(struct gk20a *g, u32 runlist_id) diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a.h b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a.h index fc149dc32..9d01fcdb3 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a.h +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a.h @@ -28,6 +28,7 @@ struct nvgpu_channel; struct nvgpu_tsg; struct gk20a; +struct nvgpu_runlist; #ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING int gk20a_runlist_reschedule(struct nvgpu_channel *ch, bool preempt_next); @@ -39,8 +40,7 @@ u32 gk20a_runlist_count_max(struct gk20a *g); #endif u32 gk20a_runlist_length_max(struct gk20a *g); -void gk20a_runlist_hw_submit(struct gk20a *g, u32 runlist_id, - u32 count, u32 buffer_index); +void gk20a_runlist_hw_submit(struct gk20a *g, struct nvgpu_runlist *runlist); int gk20a_runlist_wait_pending(struct gk20a *g, u32 runlist_id); void gk20a_runlist_write_state(struct gk20a *g, u32 runlists_mask, u32 runlist_state); diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a_fusa.c b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a_fusa.c index ed4ffab54..770f709a7 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a_fusa.c +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a_fusa.c @@ -38,29 +38,24 @@ u32 gk20a_runlist_length_max(struct gk20a *g) return fifo_eng_runlist_length_max_v(); } -void gk20a_runlist_hw_submit(struct gk20a *g, u32 runlist_id, - u32 count, u32 buffer_index) +void gk20a_runlist_hw_submit(struct gk20a *g, struct nvgpu_runlist *runlist) { - struct nvgpu_runlist *runlist = NULL; - u64 runlist_iova; + u64 runlist_iova = nvgpu_mem_get_addr(g, &runlist->domain->mem_hw->mem); - runlist = g->fifo.runlists[runlist_id]; - runlist_iova = nvgpu_mem_get_addr(g, &runlist->mem[buffer_index]); + nvgpu_spinlock_acquire(&g->fifo.runlist_submit_lock); - nvgpu_spinlock_acquire(&g->fifo.runlist_submit_lock); + if (runlist->domain->mem_hw->count != 0U) { + nvgpu_writel(g, fifo_runlist_base_r(), + fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12U)) | + nvgpu_aperture_mask(g, &runlist->domain->mem_hw->mem, + fifo_runlist_base_target_sys_mem_ncoh_f(), + fifo_runlist_base_target_sys_mem_coh_f(), + fifo_runlist_base_target_vid_mem_f())); + } - if (count != 0U) { - nvgpu_writel(g, fifo_runlist_base_r(), - fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12U)) | - nvgpu_aperture_mask(g, &runlist->mem[buffer_index], - fifo_runlist_base_target_sys_mem_ncoh_f(), - fifo_runlist_base_target_sys_mem_coh_f(), - fifo_runlist_base_target_vid_mem_f())); - } - - nvgpu_writel(g, fifo_runlist_r(), - fifo_runlist_engine_f(runlist_id) | - fifo_eng_runlist_length_f(count)); + nvgpu_writel(g, fifo_runlist_r(), + fifo_runlist_engine_f(runlist->id) | + fifo_eng_runlist_length_f(runlist->domain->mem_hw->count)); nvgpu_spinlock_release(&g->fifo.runlist_submit_lock); } diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_tu104.c b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_tu104.c index a167749a3..ec23916ab 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_tu104.c +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_tu104.c @@ -34,34 +34,31 @@ u32 tu104_runlist_count_max(struct gk20a *g) return fifo_runlist_base_lo__size_1_v(); } -void tu104_runlist_hw_submit(struct gk20a *g, u32 runlist_id, - u32 count, u32 buffer_index) +void tu104_runlist_hw_submit(struct gk20a *g, struct nvgpu_runlist *runlist) { - struct nvgpu_runlist *runlist = NULL; u64 runlist_iova; u32 runlist_iova_lo, runlist_iova_hi; - runlist = g->fifo.runlists[runlist_id]; - runlist_iova = nvgpu_mem_get_addr(g, &runlist->mem[buffer_index]); + runlist_iova = nvgpu_mem_get_addr(g, &runlist->domain->mem_hw->mem); runlist_iova_lo = u64_lo32(runlist_iova) >> fifo_runlist_base_lo_ptr_align_shift_v(); runlist_iova_hi = u64_hi32(runlist_iova); - if (count != 0U) { - nvgpu_writel(g, fifo_runlist_base_lo_r(runlist_id), + if (runlist->domain->mem_hw->count != 0U) { + nvgpu_writel(g, fifo_runlist_base_lo_r(runlist->id), fifo_runlist_base_lo_ptr_lo_f(runlist_iova_lo) | - nvgpu_aperture_mask(g, &runlist->mem[buffer_index], + nvgpu_aperture_mask(g, &runlist->domain->mem_hw->mem, fifo_runlist_base_lo_target_sys_mem_ncoh_f(), fifo_runlist_base_lo_target_sys_mem_coh_f(), fifo_runlist_base_lo_target_vid_mem_f())); - nvgpu_writel(g, fifo_runlist_base_hi_r(runlist_id), + nvgpu_writel(g, fifo_runlist_base_hi_r(runlist->id), fifo_runlist_base_hi_ptr_hi_f(runlist_iova_hi)); } - nvgpu_writel(g, fifo_runlist_submit_r(runlist_id), - fifo_runlist_submit_length_f(count)); + nvgpu_writel(g, fifo_runlist_submit_r(runlist->id), + fifo_runlist_submit_length_f(runlist->domain->mem_hw->count)); } int tu104_runlist_wait_pending(struct gk20a *g, u32 runlist_id) diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_tu104.h b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_tu104.h index 595934391..773ff440f 100644 --- a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_tu104.h +++ b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_tu104.h @@ -26,10 +26,10 @@ #include struct gk20a; +struct nvgpu_runlist; u32 tu104_runlist_count_max(struct gk20a *g); -void tu104_runlist_hw_submit(struct gk20a *g, u32 runlist_id, - u32 count, u32 buffer_index); +void tu104_runlist_hw_submit(struct gk20a *g, struct nvgpu_runlist *runlist); int tu104_runlist_wait_pending(struct gk20a *g, u32 runlist_id); #endif /* NVGPU_RUNLIST_FIFO_TU104_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h b/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h index f3c7ab169..b22e9eb32 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h @@ -32,6 +32,7 @@ */ struct gk20a; struct nvgpu_channel; +struct nvgpu_runlist; /** * Runlist HAL operations. @@ -89,8 +90,7 @@ struct gops_runlist { void (*get_tsg_entry)(struct nvgpu_tsg *tsg, u32 *runlist, u32 timeslice); void (*get_ch_entry)(struct nvgpu_channel *ch, u32 *runlist); - void (*hw_submit)(struct gk20a *g, u32 runlist_id, - u32 count, u32 buffer_index); + void (*hw_submit)(struct gk20a *g, struct nvgpu_runlist *runlist); int (*wait_pending)(struct gk20a *g, u32 runlist_id); void (*write_state)(struct gk20a *g, u32 runlists_mask, u32 runlist_state); diff --git a/drivers/gpu/nvgpu/include/nvgpu/runlist.h b/drivers/gpu/nvgpu/include/nvgpu/runlist.h index d296a6f6a..ecd857d97 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/runlist.h +++ b/drivers/gpu/nvgpu/include/nvgpu/runlist.h @@ -75,31 +75,50 @@ struct nvgpu_pbdma_info; /** Enable runlist. */ #define RUNLIST_ENABLED 1U -/** Double buffering is used to build runlists */ -#define MAX_RUNLIST_BUFFERS 2U - /** Runlist identifier is invalid. */ #define NVGPU_INVALID_RUNLIST_ID U32_MAX +/* + * Updates to this memory are still serialized by the runlist lock. + * + * TODO: add a mutex when domain updates get more fine-grained. The buffers in + * nvgpu_runlist_domain are pointer members for a reason to make this easier in + * the future; the buffers may get juggled around. + */ +struct nvgpu_runlist_mem { + /** Rendered runlist memory suitable for HW. */ + struct nvgpu_mem mem; + + /** Number of entries written in the buffer. */ + u32 count; +}; + +struct nvgpu_runlist_domain { + /** Runlist buffer free to use in sw. Swapped with another mem on next load. */ + struct nvgpu_runlist_mem *mem; + + /** Currently active buffer submitted for hardware. */ + struct nvgpu_runlist_mem *mem_hw; +}; + struct nvgpu_runlist { - /** Runlist identifier. */ + /** The HW has some designated RL IDs that are bound to engines. */ u32 id; + /** Bitmap of active channels in the runlist. One bit per chid. */ unsigned long *active_channels; /** Bitmap of active TSGs in the runlist. One bit per tsgid. */ unsigned long *active_tsgs; - /** Runlist buffers. Double buffering is used for each engine. */ - struct nvgpu_mem mem[MAX_RUNLIST_BUFFERS]; - /** Indicates current runlist buffer used by HW. */ - u32 cur_buffer; + + /* The default domain is the only one that currently exists. */ + struct nvgpu_runlist_domain *domain; + /** Bitmask of PBDMAs supported for this runlist. */ u32 pbdma_bitmask; /** Bitmask of engines using this runlist. */ u32 eng_bitmask; /** Bitmask of engines to be reset during recovery. */ u32 reset_eng_bitmask; - /** Cached hw_submit parameter. */ - u32 count; /** Protect ch/tsg/runlist preempt & runlist update. */ struct nvgpu_mutex runlist_lock; @@ -139,7 +158,8 @@ struct nvgpu_runlist { */ u32 nvgpu_runlist_construct_locked(struct nvgpu_fifo *f, struct nvgpu_runlist *runlist, - u32 buf_id, u32 max_entries); + struct nvgpu_runlist_domain *domain, + u32 max_entries); /** * @brief Add/remove channel to/from runlist (locked) diff --git a/userspace/units/fifo/runlist/gk20a/nvgpu-runlist-gk20a.c b/userspace/units/fifo/runlist/gk20a/nvgpu-runlist-gk20a.c index 0eab01cf2..69904003c 100644 --- a/userspace/units/fifo/runlist/gk20a/nvgpu-runlist-gk20a.c +++ b/userspace/units/fifo/runlist/gk20a/nvgpu-runlist-gk20a.c @@ -75,17 +75,19 @@ done: int test_gk20a_runlist_hw_submit(struct unit_module *m, struct gk20a *g, void *args) { + struct nvgpu_fifo *f = &g->fifo; int ret = UNIT_FAIL; u32 runlist_id = nvgpu_engine_get_gr_runlist_id(g); u32 count; - u32 buffer_index = 0; for (count = 0; count < 2; count++) { nvgpu_writel(g, fifo_runlist_r(), 0); nvgpu_writel(g, fifo_runlist_base_r(), 0); - gk20a_runlist_hw_submit(g, runlist_id, count, buffer_index); + f->runlists[runlist_id]->domain->mem_hw->count = count; + + gk20a_runlist_hw_submit(g, f->runlists[runlist_id]); if (count == 0) { unit_assert(nvgpu_readl(g, fifo_runlist_base_r()) == 0, goto done);