gpu: nvgpu: don't mem_{begin,end}() for gr

Now that GR buffers always have a kernel mapping, remove the unnecessary
calls to nvgpu_mem_begin() and nvgpu_mem_end() on these buffers:

- global ctx buffer mem in gr
- gr ctx mem in a tsg
- patch ctx mem in a gr ctx
- pm ctx mem in a gr ctx
- ctx_header mem in a channel (subctx header)

Change-Id: Id2a8ad108aef8db8b16dce5bae8003bbcd3b23e4
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1760599
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Holtta
2018-06-25 15:47:15 +03:00
committed by mobile promotions
parent 2dda362e63
commit dd146d42fc
5 changed files with 7 additions and 162 deletions

View File

@@ -657,9 +657,6 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
mem = &ch_ctx->mem;
if (nvgpu_mem_begin(g, mem))
return -ENOMEM;
nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
lo, GK20A_FECS_TRACE_NUM_RECORDS);
@@ -668,14 +665,9 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
GK20A_FECS_TRACE_NUM_RECORDS));
nvgpu_mem_end(g, mem);
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
mem = &ch->ctx_header.mem;
if (nvgpu_mem_begin(g, mem))
return -ENOMEM;
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
lo);
@@ -684,8 +676,6 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
aperture_mask);
nvgpu_mem_end(g, mem);
/* pid (process identifier) in user space, corresponds to tgid (thread
* group id) in kernel space.
*/

View File

@@ -111,15 +111,10 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
Flush and invalidate before cpu update. */
g->ops.mm.l2_flush(g, true);
if (nvgpu_mem_begin(g, mem))
return -ENOMEM;
*ctx_id = nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_context_id_o());
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, "ctx_id: 0x%x", *ctx_id);
nvgpu_mem_end(g, mem);
return 0;
}
@@ -696,12 +691,6 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
bool update_patch_count)
{
int err = 0;
err = nvgpu_mem_begin(g, &gr_ctx->patch_ctx.mem);
if (err)
return err;
if (update_patch_count) {
/* reset patch count if ucode has already processed it */
gr_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
@@ -717,8 +706,6 @@ void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
bool update_patch_count)
{
nvgpu_mem_end(g, &gr_ctx->patch_ctx.mem);
/* Write context count to context image if it is mapped */
if (update_patch_count) {
nvgpu_mem_wr(g, &gr_ctx->mem,
@@ -832,31 +819,22 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
gr_ctx = &tsg->gr_ctx;
mem = &gr_ctx->mem;
if (nvgpu_mem_begin(g, mem))
return -ENOMEM;
if (nvgpu_mem_begin(g, ctxheader)) {
ret = -ENOMEM;
goto clean_up_mem;
}
if (gr_ctx->zcull_ctx.gpu_va == 0 &&
gr_ctx->zcull_ctx.ctx_sw_mode ==
ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
ret = -EINVAL;
goto clean_up;
return -EINVAL;
}
ret = gk20a_disable_channel_tsg(g, c);
if (ret) {
nvgpu_err(g, "failed to disable channel/TSG");
goto clean_up;
return ret;
}
ret = gk20a_fifo_preempt(g, c);
if (ret) {
gk20a_enable_channel_tsg(g, c);
nvgpu_err(g, "failed to preempt channel/TSG");
goto clean_up;
return ret;
}
nvgpu_mem_wr(g, mem,
@@ -871,11 +849,6 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
gk20a_enable_channel_tsg(g, c);
clean_up:
nvgpu_mem_end(g, ctxheader);
clean_up_mem:
nvgpu_mem_end(g, mem);
return ret;
}
@@ -1597,12 +1570,6 @@ restore_fe_go_idle:
goto restore_fe_go_idle;
}
if (nvgpu_mem_begin(g, gold_mem))
goto clean_up;
if (nvgpu_mem_begin(g, gr_mem))
goto clean_up;
ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
ctx_header_words >>= 2;
@@ -1655,9 +1622,6 @@ clean_up:
else
nvgpu_log_fn(g, "done");
nvgpu_mem_end(g, gold_mem);
nvgpu_mem_end(g, gr_mem);
nvgpu_mutex_release(&gr->ctx_mutex);
return err;
}
@@ -1701,11 +1665,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
Flush and invalidate before cpu update. */
g->ops.mm.l2_flush(g, true);
if (nvgpu_mem_begin(g, mem)) {
ret = -ENOMEM;
goto out;
}
data = nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_pm_o());
@@ -1717,7 +1676,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_pm_o(), data);
nvgpu_mem_end(g, mem);
out:
gk20a_enable_channel_tsg(g, c);
return ret;
@@ -1807,24 +1765,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
}
/* Now clear the buffer */
if (nvgpu_mem_begin(g, &pm_ctx->mem)) {
ret = -ENOMEM;
goto cleanup_pm_buf;
}
nvgpu_memset(g, &pm_ctx->mem, 0, 0, pm_ctx->mem.size);
nvgpu_mem_end(g, &pm_ctx->mem);
}
if (nvgpu_mem_begin(g, gr_mem)) {
ret = -ENOMEM;
goto cleanup_pm_buf;
}
if (nvgpu_mem_begin(g, ctxheader)) {
ret = -ENOMEM;
goto clean_up_mem;
}
data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
@@ -1848,22 +1789,10 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
else
g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr);
nvgpu_mem_end(g, ctxheader);
nvgpu_mem_end(g, gr_mem);
/* enable channel */
gk20a_enable_channel_tsg(g, c);
return 0;
clean_up_mem:
nvgpu_mem_end(g, gr_mem);
cleanup_pm_buf:
nvgpu_gmmu_unmap(c->vm, &pm_ctx->mem, pm_ctx->mem.gpu_va);
nvgpu_dma_free(g, &pm_ctx->mem);
memset(&pm_ctx->mem, 0, sizeof(struct nvgpu_mem));
gk20a_enable_channel_tsg(g, c);
return ret;
}
void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
@@ -1904,9 +1833,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
Flush and invalidate before cpu update. */
g->ops.mm.l2_flush(g, true);
if (nvgpu_mem_begin(g, mem))
return -ENOMEM;
nvgpu_mem_wr_n(g, mem, 0,
gr->ctx_vars.local_golden_image,
gr->ctx_vars.golden_image_size);
@@ -1973,7 +1899,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
if (gr_ctx->pm_ctx.mem.gpu_va == 0) {
nvgpu_err(g,
"context switched pm with no pm buffer!");
nvgpu_mem_end(g, mem);
return -EFAULT;
}
@@ -1989,8 +1914,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
g->ops.gr.write_pm_ptr(g, mem, virt_addr);
nvgpu_mem_end(g, mem);
return ret;
}
@@ -4840,12 +4763,6 @@ static int gr_gk20a_init_access_map(struct gk20a *g)
u32 *whitelist = NULL;
int w, num_entries = 0;
if (nvgpu_mem_begin(g, mem)) {
nvgpu_err(g,
"failed to map priv access map memory");
return -ENOMEM;
}
nvgpu_memset(g, mem, 0, 0, PAGE_SIZE * nr_pages);
g->ops.gr.get_access_map(g, &whitelist, &num_entries);
@@ -4864,7 +4781,6 @@ static int gr_gk20a_init_access_map(struct gk20a *g)
nvgpu_mem_wr32(g, mem, map_byte / sizeof(u32), x);
}
nvgpu_mem_end(g, mem);
return 0;
}
@@ -6758,22 +6674,12 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
ctxsw_prog_main_image_patch_count_o(),
gr_ctx->patch_ctx.data_count);
if (ctxheader->gpu_va) {
/*
* Main context can be gr_ctx or pm_ctx.
* CPU access for relevant ctx is taken
* care of in the calling function
* __gr_gk20a_exec_ctx_ops. Need to take
* care of cpu access to ctxheader here.
*/
if (nvgpu_mem_begin(g, ctxheader))
return -ENOMEM;
nvgpu_mem_wr(g, ctxheader,
ctxsw_prog_main_image_patch_adr_lo_o(),
vaddr_lo);
nvgpu_mem_wr(g, ctxheader,
ctxsw_prog_main_image_patch_adr_hi_o(),
vaddr_hi);
nvgpu_mem_end(g, ctxheader);
} else {
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_patch_adr_lo_o(),
@@ -8038,17 +7944,8 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
ctx_ops[i].quad);
if (!err) {
if (!gr_ctx_ready) {
/* would have been a variant of
* gr_gk20a_apply_instmem_overrides,
* recoded in-place instead.
*/
if (nvgpu_mem_begin(g, &gr_ctx->mem)) {
err = -ENOMEM;
goto cleanup;
}
if (!gr_ctx_ready)
gr_ctx_ready = true;
}
current_mem = &gr_ctx->mem;
} else {
err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
@@ -8072,10 +7969,6 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
err = -EINVAL;
goto cleanup;
}
if (nvgpu_mem_begin(g, &gr_ctx->pm_ctx.mem)) {
err = -ENOMEM;
goto cleanup;
}
pm_ctx_ready = true;
}
current_mem = &gr_ctx->pm_ctx.mem;
@@ -8148,10 +8041,6 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
if (gr_ctx->patch_ctx.mem.cpu_va)
gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready);
if (gr_ctx_ready)
nvgpu_mem_end(g, &gr_ctx->mem);
if (pm_ctx_ready)
nvgpu_mem_end(g, &gr_ctx->pm_ctx.mem);
return err;
}

View File

@@ -1056,16 +1056,11 @@ int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
return -EINVAL;
if (nvgpu_mem_begin(c->g, mem))
return -ENOMEM;
v = nvgpu_mem_rd(c->g, mem, ctxsw_prog_main_image_pm_o());
v &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
nvgpu_mem_wr(c->g, mem, ctxsw_prog_main_image_pm_o(), v);
nvgpu_mem_end(c->g, mem);
nvgpu_log_fn(c->g, "done");
return 0;

View File

@@ -1115,10 +1115,6 @@ void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm,
{
struct nvgpu_mem *mem = &gr_ctx->mem;
if (nvgpu_mem_begin(g, mem)) {
WARN_ON("Cannot map context");
return;
}
nvgpu_err(g, "ctxsw_prog_main_image_magic_value_o : %x (expect %x)",
nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_magic_value_o()),
@@ -1159,7 +1155,6 @@ void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm,
"image compute preemption option (CTA is 1) %x",
nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_compute_preemption_options_o()));
nvgpu_mem_end(g, mem);
}
void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
@@ -2175,12 +2170,9 @@ int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
gr_ctx->boosted_ctx = boost;
mem = &gr_ctx->mem;
if (nvgpu_mem_begin(g, mem))
return -ENOMEM;
err = gk20a_disable_channel_tsg(g, ch);
if (err)
goto unmap_ctx;
return err;
err = gk20a_fifo_preempt(g, ch);
if (err)
@@ -2193,8 +2185,6 @@ int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
enable_ch:
gk20a_enable_channel_tsg(g, ch);
unmap_ctx:
nvgpu_mem_end(g, mem);
return err;
}
@@ -2217,8 +2207,6 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
struct tsg_gk20a *tsg;
struct vm_gk20a *vm;
struct nvgpu_mem *mem;
struct ctx_header_desc *ctx = &ch->ctx_header;
struct nvgpu_mem *ctxheader = &ctx->mem;
u32 class;
int err = 0;
@@ -2263,15 +2251,9 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
}
}
if (nvgpu_mem_begin(g, mem))
return -ENOMEM;
if (nvgpu_mem_begin(g, ctxheader))
goto unamp_ctx_header;
err = gk20a_disable_channel_tsg(g, ch);
if (err)
goto unmap_ctx;
return err;
err = gk20a_fifo_preempt(g, ch);
if (err)
@@ -2292,11 +2274,6 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
enable_ch:
gk20a_enable_channel_tsg(g, ch);
unmap_ctx:
nvgpu_mem_end(g, ctxheader);
unamp_ctx_header:
nvgpu_mem_end(g, mem);
return err;
}

View File

@@ -82,11 +82,7 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c)
return -ENOMEM;
}
/* Now clear the buffer */
if (nvgpu_mem_begin(g, &ctx->mem))
return -ENOMEM;
nvgpu_memset(g, &ctx->mem, 0, 0, ctx->mem.size);
nvgpu_mem_end(g, &ctx->mem);
}
return ret;
}
@@ -117,8 +113,6 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
gr_mem = &ctx->mem;
g->ops.mm.l2_flush(g, true);
if (nvgpu_mem_begin(g, gr_mem))
return -ENOMEM;
/* set priv access map */
addr_lo = u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
@@ -153,7 +147,7 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
nvgpu_mem_wr(g, gr_mem,
ctxsw_prog_main_image_ctl_o(),
ctxsw_prog_main_image_ctl_type_per_veid_header_v());
nvgpu_mem_end(g, gr_mem);
return ret;
}