From 96dc116eedd7b8e38e9235b9d44ed21f7fd86e93 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Thu, 17 Sep 2020 18:18:23 +0530 Subject: [PATCH] gpu: nvgpu: support context creation for specific GR instance Get current GR instance pointer with nvgpu_gr_get_cur_instance_ptr() in nvgpu_gr_setup_alloc_obj_ctx() and update all the code in this function to use this GR instance pointer instead of globally accessing g->gr->* data structures. Add lots of GR engine specific debug prints in context creation path. Jira NVGPU-5648 Change-Id: Ia8681d115ee88c5848621854f23e1cce4ff3deb2 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2415239 Reviewed-by: automaticguardword Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-cert Reviewed-by: Lakshmanan M Reviewed-by: mobile promotions GVS: Gerrit_Virtual_Submit Tested-by: Lakshmanan M Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/gr/ctx.c | 13 +++-- drivers/gpu/nvgpu/common/gr/global_ctx.c | 2 + drivers/gpu/nvgpu/common/gr/gr_setup.c | 15 +++-- drivers/gpu/nvgpu/common/gr/gr_utils.c | 3 +- drivers/gpu/nvgpu/common/gr/obj_ctx.c | 57 +++++++++++++------ drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c | 2 +- 6 files changed, 63 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/nvgpu/common/gr/ctx.c b/drivers/gpu/nvgpu/common/gr/ctx.c index c71c23c54..c4ddf3a1f 100644 --- a/drivers/gpu/nvgpu/common/gr/ctx.c +++ b/drivers/gpu/nvgpu/common/gr/ctx.c @@ -144,7 +144,7 @@ int nvgpu_gr_ctx_alloc_patch_ctx(struct gk20a *g, struct patch_desc *patch_ctx = &gr_ctx->patch_ctx; int err = 0; - nvgpu_log(g, gpu_dbg_info, "patch buffer size in entries: %d", + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "patch_ctx size = %u", gr_ctx_desc->size[NVGPU_GR_CTX_PATCH_CTX]); err = nvgpu_dma_alloc_map_sys(vm, gr_ctx_desc->size[NVGPU_GR_CTX_PATCH_CTX], @@ -353,7 +353,7 @@ int nvgpu_gr_ctx_map_global_ctx_buffers(struct gk20a *g, { int err; - nvgpu_log_fn(g, " "); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); /* Circular Buffer */ err = nvgpu_gr_ctx_map_ctx_circular_buffer(g, gr_ctx, @@ -420,6 +420,7 @@ int nvgpu_gr_ctx_map_global_ctx_buffers(struct gk20a *g, gr_ctx->global_ctx_buffer_mapped = true; + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); return 0; fail: @@ -473,7 +474,7 @@ void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g, u64 virt_addr = 0; #endif - nvgpu_log_fn(g, " "); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); mem = &gr_ctx->mem; @@ -506,7 +507,7 @@ void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g, } #endif - nvgpu_log(g, gpu_dbg_info, "write patch count = %d", + nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "write patch count = %d", gr_ctx->patch_ctx.data_count); g->ops.gr.ctxsw_prog.set_patch_count(g, mem, gr_ctx->patch_ctx.data_count); @@ -522,6 +523,8 @@ void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g, g->ops.gr.ctxsw_prog.set_pm_mode(g, mem, gr_ctx->pm_ctx.pm_mode); g->ops.gr.ctxsw_prog.set_pm_ptr(g, mem, virt_addr); #endif + + nvgpu_log(g, gpu_dbg_gr, "done"); } /* @@ -724,6 +727,8 @@ int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) { int err; + nvgpu_log(g, gpu_dbg_gr, " "); + err = g->ops.mm.cache.l2_flush(g, true); if (err != 0) { nvgpu_err(g, "l2_flush failed"); diff --git a/drivers/gpu/nvgpu/common/gr/global_ctx.c b/drivers/gpu/nvgpu/common/gr/global_ctx.c index efd9cc7a7..7480cc179 100644 --- a/drivers/gpu/nvgpu/common/gr/global_ctx.c +++ b/drivers/gpu/nvgpu/common/gr/global_ctx.c @@ -426,6 +426,8 @@ void nvgpu_gr_global_ctx_load_local_golden_image(struct gk20a *g, nvgpu_mem_wr_n(g, target_mem, 0, local_golden_image->context, nvgpu_safe_cast_u64_to_u32(local_golden_image->size)); + + nvgpu_log(g, gpu_dbg_gr, "loaded saved golden image into gr_ctx"); } void nvgpu_gr_global_ctx_deinit_local_golden_image(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/common/gr/gr_setup.c b/drivers/gpu/nvgpu/common/gr/gr_setup.c index cc7a67fe4..edbdf0caf 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_setup.c +++ b/drivers/gpu/nvgpu/common/gr/gr_setup.c @@ -29,6 +29,7 @@ #include #endif #include +#include #include #include @@ -144,8 +145,11 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, struct nvgpu_gr_ctx *gr_ctx; struct nvgpu_tsg *tsg = NULL; int err = 0; + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); - nvgpu_log_fn(g, " "); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, + "GR%u: allocate object context for channel %u", + gr->instance_id, c->chid); err = nvgpu_gr_setup_validate_channel_and_class(g, c, class_num); if (err != 0) { @@ -172,7 +176,6 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, gr_ctx = tsg->gr_ctx; - err = nvgpu_gr_setup_alloc_subctx(g, c); if (err != 0) { nvgpu_err(g, "failed to allocate gr subctx buffer"); @@ -183,9 +186,9 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, tsg->vm = c->vm; nvgpu_vm_get(tsg->vm); - err = nvgpu_gr_obj_ctx_alloc(g, g->gr->golden_image, - g->gr->global_ctx_buffer, g->gr->gr_ctx_desc, - g->gr->config, gr_ctx, c->subctx, + err = nvgpu_gr_obj_ctx_alloc(g, gr->golden_image, + gr->global_ctx_buffer, gr->gr_ctx_desc, + gr->config, gr_ctx, c->subctx, tsg->vm, &c->inst_block, class_num, flags, c->cde, c->vpr); if (err != 0) { @@ -214,7 +217,7 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, } #endif - nvgpu_log_fn(g, "done"); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); return 0; out: if (c->subctx != NULL) { diff --git a/drivers/gpu/nvgpu/common/gr/gr_utils.c b/drivers/gpu/nvgpu/common/gr/gr_utils.c index 34119ed99..b5d7a7680 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_utils.c +++ b/drivers/gpu/nvgpu/common/gr/gr_utils.c @@ -87,7 +87,8 @@ struct nvgpu_gr_zbc *nvgpu_gr_get_zbc_ptr(struct gk20a *g) struct nvgpu_gr_global_ctx_buffer_desc *nvgpu_gr_get_global_ctx_buffer_ptr( struct gk20a *g) { - return g->gr->global_ctx_buffer; + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + return gr->global_ctx_buffer; } #endif diff --git a/drivers/gpu/nvgpu/common/gr/obj_ctx.c b/drivers/gpu/nvgpu/common/gr/obj_ctx.c index 93ad03c55..48b0b31dd 100644 --- a/drivers/gpu/nvgpu/common/gr/obj_ctx.c +++ b/drivers/gpu/nvgpu/common/gr/obj_ctx.c @@ -52,7 +52,7 @@ void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block, { struct nvgpu_mem *ctxheader; - nvgpu_log_fn(g, " "); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { nvgpu_gr_subctx_load_ctx_header(g, subctx, gr_ctx, gpu_va); @@ -63,6 +63,8 @@ void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block, } else { nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block, gpu_va); } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); } #if defined(CONFIG_NVGPU_GRAPHICS) || defined(CONFIG_NVGPU_CILP) @@ -75,7 +77,7 @@ static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g, u32 graphics_preempt_mode = 0U; u32 compute_preempt_mode = 0U; - nvgpu_log_fn(g, " "); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); #ifdef CONFIG_NVGPU_GRAPHICS if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP)) { @@ -107,7 +109,7 @@ static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g, } } - nvgpu_log_fn(g, "done"); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); return 0; } @@ -255,18 +257,18 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_mem *mem; #endif - nvgpu_log_fn(g, " "); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); nvgpu_gr_ctx_set_preemption_modes(g, gr_ctx); #ifdef CONFIG_NVGPU_GRAPHICS if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP)) { - return; + goto done; } if (!nvgpu_mem_is_valid( nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx))) { - return; + goto done; } if (subctx != NULL) { @@ -311,8 +313,9 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true); +done: #endif - nvgpu_log_fn(g, "done"); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); } void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, @@ -322,7 +325,7 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, u64 addr; u32 size; - nvgpu_log_fn(g, " "); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); if (patch) { nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, false); @@ -382,6 +385,8 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, if (patch) { nvgpu_gr_ctx_patch_write_end(g, gr_ctx, false); } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); } static int nvgpu_gr_obj_ctx_alloc_sw_bundle(struct gk20a *g) @@ -445,6 +450,8 @@ static int nvgpu_gr_obj_ctx_init_hw_state(struct gk20a *g, struct netlist_aiv_list *sw_ctx_load = nvgpu_netlist_get_sw_ctx_load_aiv_list(g); + nvgpu_log(g, gpu_dbg_gr, " "); + err = g->ops.gr.init.fe_pwr_mode_force_on(g, true); if (err != 0) { goto clean_up; @@ -491,6 +498,9 @@ static int nvgpu_gr_obj_ctx_init_hw_state(struct gk20a *g, err = g->ops.gr.init.wait_idle(g); clean_up: + if (err == 0) { + nvgpu_log(g, gpu_dbg_gr, "done"); + } return err; } @@ -506,6 +516,8 @@ static int nvgpu_gr_obj_ctx_commit_hw_state(struct gk20a *g, nvgpu_netlist_get_sw_bundle_init_av_list(g); #endif + nvgpu_log(g, gpu_dbg_gr, " "); + /* disable fe_go_idle */ g->ops.gr.init.fe_go_idle_timeout(g, false); @@ -560,6 +572,7 @@ static int nvgpu_gr_obj_ctx_commit_hw_state(struct gk20a *g, goto clean_up; } + nvgpu_log(g, gpu_dbg_gr, "done"); return 0; restore_fe_go_idle: @@ -588,6 +601,8 @@ static int nvgpu_gr_obj_ctx_save_golden_ctx(struct gk20a *g, NULL; #endif + nvgpu_log(g, gpu_dbg_gr, " "); + gr_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx); #ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION @@ -666,6 +681,10 @@ clean_up: local_golden_image_temp); } #endif + + if (err == 0) { + nvgpu_log(g, gpu_dbg_gr, "golden image saved with size = %llu", size); + } return err; } @@ -682,8 +701,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, { int err = 0; - nvgpu_log_fn(g, " "); - + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); /* * golden ctx is global to all channels. Although only the first @@ -693,6 +711,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, nvgpu_mutex_acquire(&golden_image->ctx_mutex); if (golden_image->ready) { + nvgpu_log(g, gpu_dbg_gr, "golden image already saved"); goto clean_up; } @@ -708,14 +727,16 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, } #ifdef CONFIG_NVGPU_GRAPHICS - err = nvgpu_gr_ctx_init_zcull(g, gr_ctx); - if (err != 0) { - goto clean_up; + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + err = nvgpu_gr_ctx_init_zcull(g, gr_ctx); + if (err != 0) { + goto clean_up; + } } #endif err = nvgpu_gr_obj_ctx_save_golden_ctx(g, golden_image, - gr_ctx, inst_block); + gr_ctx, inst_block); if (err != 0) { goto clean_up; } @@ -730,7 +751,7 @@ clean_up: if (err != 0) { nvgpu_err(g, "fail"); } else { - nvgpu_log_fn(g, "done"); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); } nvgpu_mutex_release(&golden_image->ctx_mutex); @@ -751,6 +772,7 @@ static int nvgpu_gr_obj_ctx_gr_ctx_alloc(struct gk20a *g, nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_CTX, nvgpu_safe_cast_u64_to_u32(size)); + nvgpu_log(g, gpu_dbg_gr, "gr_ctx size = %llu", size); err = nvgpu_gr_ctx_alloc(g, gr_ctx, gr_ctx_desc, vm); if (err != 0) { return err; @@ -773,7 +795,7 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g, { int err = 0; - nvgpu_log_fn(g, " "); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); err = nvgpu_gr_obj_ctx_gr_ctx_alloc(g, golden_image, gr_ctx_desc, gr_ctx, vm); @@ -830,6 +852,7 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g, nvgpu_err(g, "fail to init golden ctx image"); goto out; } + #ifdef CONFIG_NVGPU_POWER_PG /* Re-enable ELPG now that golden image has been initialized. * The PMU PG init code may already have tried to enable elpg, but @@ -850,7 +873,7 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g, nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, config, gr_ctx, subctx); - nvgpu_log_fn(g, "done"); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); return 0; out: /* diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c index 0b27345bc..2a6ffa4fa 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c @@ -194,7 +194,7 @@ int gv11b_gr_init_preemption_state(struct gk20a *g) { u32 debug_2; - nvgpu_log_fn(g, " "); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); debug_2 = nvgpu_readl(g, gr_debug_2_r()); debug_2 = set_field(debug_2,