mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: Prune redundant cache maintenance
Remove redundant cache maintenance operations. Instance blocks and graphics context buffers are uncached, so they do not need any cache maintenance. Bug 1421824 Change-Id: Ie0be67bf0be493d9ec9e6f8226f2f9359cba9f54 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/406948
This commit is contained in:
committed by
Dan Willemsen
parent
3e5c123862
commit
48239f5f8c
@@ -128,8 +128,6 @@ int channel_gk20a_commit_va(struct channel_gk20a *c)
|
|||||||
gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
|
gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
|
||||||
ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
|
ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
|
||||||
|
|
||||||
gk20a_mm_l2_invalidate(c->g);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -159,8 +157,6 @@ static int channel_gk20a_commit_userd(struct channel_gk20a *c)
|
|||||||
pbdma_userd_target_vid_mem_f() |
|
pbdma_userd_target_vid_mem_f() |
|
||||||
pbdma_userd_hi_addr_f(addr_hi));
|
pbdma_userd_hi_addr_f(addr_hi));
|
||||||
|
|
||||||
gk20a_mm_l2_invalidate(c->g);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -183,9 +179,6 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
|
|||||||
/* preempt the channel */
|
/* preempt the channel */
|
||||||
WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
|
WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
|
||||||
|
|
||||||
/* flush GPU cache */
|
|
||||||
gk20a_mm_l2_flush(c->g, true);
|
|
||||||
|
|
||||||
/* value field is 8 bits long */
|
/* value field is 8 bits long */
|
||||||
while (value >= 1 << 8) {
|
while (value >= 1 << 8) {
|
||||||
value >>= 1;
|
value >>= 1;
|
||||||
@@ -209,8 +202,6 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
|
|||||||
gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
|
gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
|
||||||
ccsr_channel_enable_set_true_f());
|
ccsr_channel_enable_set_true_f());
|
||||||
|
|
||||||
gk20a_mm_l2_invalidate(c->g);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -277,8 +268,6 @@ static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
|
|||||||
|
|
||||||
gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
|
gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
|
||||||
|
|
||||||
gk20a_mm_l2_invalidate(c->g);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -299,8 +288,6 @@ static int channel_gk20a_setup_userd(struct channel_gk20a *c)
|
|||||||
gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
|
gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
|
||||||
gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
|
gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
|
||||||
|
|
||||||
gk20a_mm_l2_invalidate(c->g);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -649,8 +636,6 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
|
|||||||
ch->gpfifo.cpu_va = NULL;
|
ch->gpfifo.cpu_va = NULL;
|
||||||
ch->gpfifo.iova = 0;
|
ch->gpfifo.iova = 0;
|
||||||
|
|
||||||
gk20a_mm_l2_invalidate(ch->g);
|
|
||||||
|
|
||||||
memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
|
memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
|
||||||
|
|
||||||
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||||
@@ -1155,8 +1140,6 @@ static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
channel_gk20a_setup_userd(c);
|
channel_gk20a_setup_userd(c);
|
||||||
channel_gk20a_commit_userd(c);
|
channel_gk20a_commit_userd(c);
|
||||||
|
|
||||||
gk20a_mm_l2_invalidate(c->g);
|
|
||||||
|
|
||||||
/* TBD: setup engine contexts */
|
/* TBD: setup engine contexts */
|
||||||
|
|
||||||
err = channel_gk20a_alloc_priv_cmdbuf(c);
|
err = channel_gk20a_alloc_priv_cmdbuf(c);
|
||||||
|
|||||||
@@ -538,9 +538,7 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
|
|||||||
|
|
||||||
gk20a_dbg_fn("");
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
/* flush gpu_va before commit */
|
|
||||||
gk20a_mm_fb_flush(c->g);
|
gk20a_mm_fb_flush(c->g);
|
||||||
gk20a_mm_l2_flush(c->g, true);
|
|
||||||
|
|
||||||
inst_ptr = c->inst_block.cpuva;
|
inst_ptr = c->inst_block.cpuva;
|
||||||
if (!inst_ptr)
|
if (!inst_ptr)
|
||||||
@@ -556,8 +554,6 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
|
|||||||
gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(),
|
gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(),
|
||||||
ram_in_gr_wfi_ptr_hi_f(addr_hi));
|
ram_in_gr_wfi_ptr_hi_f(addr_hi));
|
||||||
|
|
||||||
gk20a_mm_l2_invalidate(c->g);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -600,8 +596,6 @@ int gr_gk20a_ctx_patch_write_end(struct gk20a *g,
|
|||||||
|
|
||||||
vunmap(ch_ctx->patch_ctx.cpu_va);
|
vunmap(ch_ctx->patch_ctx.cpu_va);
|
||||||
ch_ctx->patch_ctx.cpu_va = NULL;
|
ch_ctx->patch_ctx.cpu_va = NULL;
|
||||||
|
|
||||||
gk20a_mm_l2_invalidate(g);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -718,10 +712,7 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Channel gr_ctx buffer is gpu cacheable.
|
|
||||||
Flush and invalidate before cpu update. */
|
|
||||||
gk20a_mm_fb_flush(g);
|
gk20a_mm_fb_flush(g);
|
||||||
gk20a_mm_l2_flush(g, true);
|
|
||||||
|
|
||||||
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0,
|
gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0,
|
||||||
ch_ctx->zcull_ctx.ctx_sw_mode);
|
ch_ctx->zcull_ctx.ctx_sw_mode);
|
||||||
@@ -736,7 +727,6 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
|
|||||||
goto clean_up;
|
goto clean_up;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gk20a_mm_l2_invalidate(g);
|
|
||||||
|
|
||||||
clean_up:
|
clean_up:
|
||||||
vunmap(ctx_ptr);
|
vunmap(ctx_ptr);
|
||||||
@@ -1466,10 +1456,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
|
|||||||
ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
|
ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
|
||||||
ctx_header_words >>= 2;
|
ctx_header_words >>= 2;
|
||||||
|
|
||||||
/* Channel gr_ctx buffer is gpu cacheable.
|
|
||||||
Flush before cpu read. */
|
|
||||||
gk20a_mm_fb_flush(g);
|
gk20a_mm_fb_flush(g);
|
||||||
gk20a_mm_l2_flush(g, false);
|
|
||||||
|
|
||||||
for (i = 0; i < ctx_header_words; i++) {
|
for (i = 0; i < ctx_header_words; i++) {
|
||||||
data = gk20a_mem_rd32(ctx_ptr, i);
|
data = gk20a_mem_rd32(ctx_ptr, i);
|
||||||
@@ -1504,8 +1491,6 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
|
|||||||
|
|
||||||
gr->ctx_vars.golden_image_initialized = true;
|
gr->ctx_vars.golden_image_initialized = true;
|
||||||
|
|
||||||
gk20a_mm_l2_invalidate(g);
|
|
||||||
|
|
||||||
gk20a_writel(g, gr_fecs_current_ctx_r(),
|
gk20a_writel(g, gr_fecs_current_ctx_r(),
|
||||||
gr_fecs_current_ctx_valid_false_f());
|
gr_fecs_current_ctx_valid_false_f());
|
||||||
|
|
||||||
@@ -1537,7 +1522,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
|
|||||||
/* Channel gr_ctx buffer is gpu cacheable.
|
/* Channel gr_ctx buffer is gpu cacheable.
|
||||||
Flush and invalidate before cpu update. */
|
Flush and invalidate before cpu update. */
|
||||||
gk20a_mm_fb_flush(g);
|
gk20a_mm_fb_flush(g);
|
||||||
gk20a_mm_l2_flush(g, true);
|
|
||||||
|
|
||||||
ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
|
ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
|
||||||
PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
|
PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
|
||||||
@@ -1555,8 +1539,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
|
|||||||
|
|
||||||
vunmap(ctx_ptr);
|
vunmap(ctx_ptr);
|
||||||
|
|
||||||
gk20a_mm_l2_invalidate(g);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1580,7 +1562,6 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
|
|||||||
/* Channel gr_ctx buffer is gpu cacheable.
|
/* Channel gr_ctx buffer is gpu cacheable.
|
||||||
Flush and invalidate before cpu update. */
|
Flush and invalidate before cpu update. */
|
||||||
gk20a_mm_fb_flush(g);
|
gk20a_mm_fb_flush(g);
|
||||||
gk20a_mm_l2_flush(g, true);
|
|
||||||
|
|
||||||
ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
|
ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
|
||||||
PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
|
PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
|
||||||
@@ -1636,8 +1617,6 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
|
|||||||
|
|
||||||
vunmap(ctx_ptr);
|
vunmap(ctx_ptr);
|
||||||
|
|
||||||
gk20a_mm_l2_invalidate(g);
|
|
||||||
|
|
||||||
if (tegra_platform_is_linsim()) {
|
if (tegra_platform_is_linsim()) {
|
||||||
u32 inst_base_ptr =
|
u32 inst_base_ptr =
|
||||||
u64_lo32(c->inst_block.cpu_pa
|
u64_lo32(c->inst_block.cpu_pa
|
||||||
@@ -2716,7 +2695,6 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
|
|||||||
}
|
}
|
||||||
c->first_init = true;
|
c->first_init = true;
|
||||||
}
|
}
|
||||||
gk20a_mm_l2_invalidate(g);
|
|
||||||
|
|
||||||
c->num_objects++;
|
c->num_objects++;
|
||||||
|
|
||||||
@@ -4223,8 +4201,6 @@ restore_fe_go_idle:
|
|||||||
sw_method_init->l[i].addr);
|
sw_method_init->l[i].addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
gk20a_mm_l2_invalidate(g);
|
|
||||||
|
|
||||||
err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
|
err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
|
||||||
if (err)
|
if (err)
|
||||||
goto out;
|
goto out;
|
||||||
@@ -5797,10 +5773,6 @@ int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
|
|||||||
|
|
||||||
/* we're not caching these on cpu side,
|
/* we're not caching these on cpu side,
|
||||||
but later watch for it */
|
but later watch for it */
|
||||||
|
|
||||||
/* the l2 invalidate in the patch_write
|
|
||||||
* would be too early for this? */
|
|
||||||
gk20a_mm_l2_invalidate(g);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -6538,10 +6510,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Channel gr_ctx buffer is gpu cacheable; so flush and invalidate.
|
|
||||||
* There should be no on-going/in-flight references by the gpu now. */
|
|
||||||
gk20a_mm_fb_flush(g);
|
gk20a_mm_fb_flush(g);
|
||||||
gk20a_mm_l2_flush(g, true);
|
|
||||||
|
|
||||||
/* write to appropriate place in context image,
|
/* write to appropriate place in context image,
|
||||||
* first have to figure out where that really is */
|
* first have to figure out where that really is */
|
||||||
|
|||||||
@@ -1937,8 +1937,6 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
|
|||||||
vaddr += pgsz;
|
vaddr += pgsz;
|
||||||
}
|
}
|
||||||
|
|
||||||
gk20a_mm_l2_flush(mm->g, true);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err_unmap:
|
err_unmap:
|
||||||
|
|||||||
Reference in New Issue
Block a user