/* * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include #include #include #include #ifdef CONFIG_NVGPU_POWER_PG #include #include #endif #include #include #include #include #include #include #include #include #include #include #include "obj_ctx_priv.h" void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g, struct nvgpu_mem *inst_block, u64 gpu_va) { g->ops.ramin.set_gr_ptr(g, inst_block, gpu_va); } void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block, struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, u64 gpu_va) { struct nvgpu_mem *ctxheader; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { nvgpu_gr_subctx_load_ctx_header(g, subctx, gr_ctx, gpu_va); ctxheader = nvgpu_gr_subctx_get_ctx_header(subctx); nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block, ctxheader->gpu_va); } else { nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block, gpu_va); } nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); } #if defined(CONFIG_NVGPU_GRAPHICS) || defined(CONFIG_NVGPU_CILP) static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc, struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class_num, u32 flags) { int err; u32 graphics_preempt_mode = 0U; u32 compute_preempt_mode = 0U; u32 default_graphics_preempt_mode = 0U; u32 default_compute_preempt_mode = 0U; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); /* Skip for engines other than GR */ if (!g->ops.gpu_class.is_valid_compute(class_num) && !g->ops.gpu_class.is_valid_gfx(class_num)) { return 0; } g->ops.gr.init.get_default_preemption_modes( &default_graphics_preempt_mode, &default_compute_preempt_mode); #ifdef CONFIG_NVGPU_GRAPHICS if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) != 0U) { graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; } if (g->ops.gpu_class.is_valid_gfx(class_num) && nvgpu_gr_ctx_desc_force_preemption_gfxp(gr_ctx_desc)) { graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; } if (graphics_preempt_mode == 0U) { graphics_preempt_mode = default_graphics_preempt_mode; } #endif #ifdef CONFIG_NVGPU_CILP if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) != 0U) { compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; } if (g->ops.gpu_class.is_valid_compute(class_num) && nvgpu_gr_ctx_desc_force_preemption_cilp(gr_ctx_desc)) { compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; } #endif if (compute_preempt_mode == 0U) { compute_preempt_mode = default_compute_preempt_mode; } err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, config, gr_ctx_desc, gr_ctx, vm, class_num, graphics_preempt_mode, compute_preempt_mode); if (err != 0) { nvgpu_err(g, "set_ctxsw_preemption_mode failed"); return err; } nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); return 0; } #endif #ifdef CONFIG_NVGPU_GRAPHICS static int nvgpu_gr_obj_ctx_set_graphics_preemption_mode(struct gk20a *g, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc, struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 graphics_preempt_mode) { int err = 0; /* set preemption modes */ switch (graphics_preempt_mode) { case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP: { u32 rtv_cb_size; u32 spill_size = g->ops.gr.init.get_ctx_spill_size(g); u32 pagepool_size = g->ops.gr.init.get_ctx_pagepool_size(g); u32 betacb_size = g->ops.gr.init.get_ctx_betacb_size(g); u32 attrib_cb_size = g->ops.gr.init.get_ctx_attrib_cb_size(g, betacb_size, nvgpu_gr_config_get_tpc_count(config), nvgpu_gr_config_get_max_tpc_count(config)); nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size); nvgpu_log_info(g, "gfxp context pagepool_size=%d", pagepool_size); nvgpu_log_info(g, "gfxp context attrib_cb_size=%d", attrib_cb_size); nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_SPILL_CTXSW, spill_size); nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_BETACB_CTXSW, attrib_cb_size); nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_PAGEPOOL_CTXSW, pagepool_size); if (g->ops.gr.init.get_gfxp_rtv_cb_size != NULL) { rtv_cb_size = g->ops.gr.init.get_gfxp_rtv_cb_size(g); nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_GFXP_RTVCB_CTXSW, rtv_cb_size); } err = nvgpu_gr_ctx_alloc_ctxsw_buffers(g, gr_ctx, gr_ctx_desc, vm); if (err != 0) { nvgpu_err(g, "cannot allocate ctxsw buffers"); goto fail; } nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx, graphics_preempt_mode); break; } case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI: nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx, graphics_preempt_mode); break; default: nvgpu_log_info(g, "graphics_preempt_mode=%u", graphics_preempt_mode); break; } fail: return err; } #endif static int nvgpu_gr_obj_ctx_set_compute_preemption_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, u32 class_num, u32 compute_preempt_mode) { if (g->ops.gpu_class.is_valid_compute(class_num) #ifdef CONFIG_NVGPU_GRAPHICS || g->ops.gpu_class.is_valid_gfx(class_num) #endif ) { nvgpu_gr_ctx_init_compute_preemption_mode(gr_ctx, compute_preempt_mode); return 0; } else { return -EINVAL; } } int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc, struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class_num, u32 graphics_preempt_mode, u32 compute_preempt_mode) { int err = 0; /* check for invalid combinations */ if (nvgpu_gr_ctx_check_valid_preemption_mode(g, gr_ctx, graphics_preempt_mode, compute_preempt_mode) == false) { err = -EINVAL; goto fail; } nvgpu_log(g, gpu_dbg_gr, "graphics_preempt_mode=%u compute_preempt_mode=%u", graphics_preempt_mode, compute_preempt_mode); #ifdef CONFIG_NVGPU_GRAPHICS err = nvgpu_gr_obj_ctx_set_graphics_preemption_mode(g, config, gr_ctx_desc, gr_ctx, vm, graphics_preempt_mode); if (err != 0) { goto fail; } #endif err = nvgpu_gr_obj_ctx_set_compute_preemption_mode(g, gr_ctx, class_num, compute_preempt_mode); fail: return err; } void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx) { #ifdef CONFIG_NVGPU_GRAPHICS u64 addr; u32 size; struct nvgpu_mem *mem; #endif nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); nvgpu_gr_ctx_set_preemption_modes(g, gr_ctx); #ifdef CONFIG_NVGPU_GRAPHICS if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP)) { goto done; } if (!nvgpu_mem_is_valid( nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx))) { goto done; } if (subctx != NULL) { nvgpu_gr_subctx_set_preemption_buffer_va(g, subctx, gr_ctx); } else { nvgpu_gr_ctx_set_preemption_buffer_va(g, gr_ctx); } nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true); addr = nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->gpu_va; g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx, nvgpu_gr_config_get_tpc_count(config), nvgpu_gr_config_get_max_tpc_count(config), addr, true); mem = nvgpu_gr_ctx_get_pagepool_ctxsw_buffer(gr_ctx); addr = mem->gpu_va; nvgpu_assert(mem->size <= U32_MAX); size = (u32)mem->size; g->ops.gr.init.commit_global_pagepool(g, gr_ctx, addr, size, true, false); mem = nvgpu_gr_ctx_get_spill_ctxsw_buffer(gr_ctx); addr = mem->gpu_va; nvgpu_assert(mem->size <= U32_MAX); size = (u32)mem->size; g->ops.gr.init.commit_ctxsw_spill(g, gr_ctx, addr, size, true); g->ops.gr.init.commit_cbes_reserve(g, gr_ctx, true); if (g->ops.gr.init.gfxp_wfi_timeout != NULL) { g->ops.gr.init.gfxp_wfi_timeout(g, gr_ctx, true); } if (g->ops.gr.init.commit_gfxp_rtv_cb != NULL) { g->ops.gr.init.commit_gfxp_rtv_cb(g, gr_ctx, true); } nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true); done: #endif nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); } void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, bool patch) { u64 addr; u32 size; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); if (patch) { nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, false); } /* * MIG supports only compute class. * Skip BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB * if 2D/3D/I2M classes(graphics) are not supported. */ if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { /* global pagepool buffer */ addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, NVGPU_GR_CTX_PAGEPOOL_VA); size = nvgpu_safe_cast_u64_to_u32(nvgpu_gr_global_ctx_get_size( global_ctx_buffer, NVGPU_GR_GLOBAL_CTX_PAGEPOOL)); g->ops.gr.init.commit_global_pagepool(g, gr_ctx, addr, size, patch, true); /* global bundle cb */ addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, NVGPU_GR_CTX_CIRCULAR_VA); size = nvgpu_safe_cast_u64_to_u32( g->ops.gr.init.get_bundle_cb_default_size(g)); g->ops.gr.init.commit_global_bundle_cb(g, gr_ctx, addr, size, patch); /* global attrib cb */ addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, NVGPU_GR_CTX_ATTRIBUTE_VA); g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx, nvgpu_gr_config_get_tpc_count(config), nvgpu_gr_config_get_max_tpc_count(config), addr, patch); g->ops.gr.init.commit_global_cb_manager(g, config, gr_ctx, patch); #ifdef CONFIG_NVGPU_DGPU if (g->ops.gr.init.commit_rtv_cb != NULL) { /* RTV circular buffer */ addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, NVGPU_GR_CTX_RTV_CIRCULAR_BUFFER_VA); g->ops.gr.init.commit_rtv_cb(g, addr, gr_ctx, patch); } #endif } #ifdef CONFIG_NVGPU_SM_DIVERSITY if ((nvgpu_is_enabled(g, NVGPU_SUPPORT_SM_DIVERSITY)) && (nvgpu_gr_ctx_get_sm_diversity_config(gr_ctx) != NVGPU_DEFAULT_SM_DIVERSITY_CONFIG) && (g->ops.gr.init.commit_sm_id_programming != NULL)) { int err; err = g->ops.gr.init.commit_sm_id_programming( g, config, gr_ctx, patch); if (err != 0) { nvgpu_err(g, "commit_sm_id_programming failed err=%d", err); } } #endif if (patch) { nvgpu_gr_ctx_patch_write_end(g, gr_ctx, false); } nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); } static int nvgpu_gr_obj_ctx_alloc_sw_bundle(struct gk20a *g) { int err = 0; struct netlist_av_list *sw_bundle_init = nvgpu_netlist_get_sw_bundle_init_av_list(g); struct netlist_av_list *sw_veid_bundle_init = nvgpu_netlist_get_sw_veid_bundle_init_av_list(g); #ifdef CONFIG_NVGPU_DGPU struct netlist_av64_list *sw_bundle64_init = nvgpu_netlist_get_sw_bundle64_init_av64_list(g); #endif /* enable pipe mode override */ g->ops.gr.init.pipe_mode_override(g, true); /* load bundle init */ err = g->ops.gr.init.load_sw_bundle_init(g, sw_bundle_init); if (err != 0) { goto error; } if (g->ops.gr.init.load_sw_veid_bundle != NULL) { err = g->ops.gr.init.load_sw_veid_bundle(g, sw_veid_bundle_init); if (err != 0) { goto error; } } #ifdef CONFIG_NVGPU_DGPU if (g->ops.gr.init.load_sw_bundle64 != NULL) { err = g->ops.gr.init.load_sw_bundle64(g, sw_bundle64_init); if (err != 0) { goto error; } } #endif /* disable pipe mode override */ g->ops.gr.init.pipe_mode_override(g, false); err = g->ops.gr.init.wait_idle(g); return err; error: /* in case of error skip waiting for GR idle - just restore state */ g->ops.gr.init.pipe_mode_override(g, false); return err; } static int nvgpu_gr_obj_ctx_init_hw_state(struct gk20a *g, struct nvgpu_mem *inst_block) { int err = 0; u32 data; u32 i; struct netlist_aiv_list *sw_ctx_load = nvgpu_netlist_get_sw_ctx_load_aiv_list(g); nvgpu_log(g, gpu_dbg_gr, " "); err = g->ops.gr.init.fe_pwr_mode_force_on(g, true); if (err != 0) { goto clean_up; } g->ops.gr.init.override_context_reset(g); err = g->ops.gr.init.fe_pwr_mode_force_on(g, false); if (err != 0) { goto clean_up; } data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block); err = g->ops.gr.falcon.ctrl_ctxsw(g, NVGPU_GR_FALCON_METHOD_ADDRESS_BIND_PTR, data, NULL); if (err != 0) { goto clean_up; } err = g->ops.gr.init.wait_idle(g); /* load ctx init */ nvgpu_log_info(g, "begin: netlist: sw_ctx_load: register writes"); for (i = 0U; i < sw_ctx_load->count; i++) { nvgpu_writel(g, sw_ctx_load->l[i].addr, sw_ctx_load->l[i].value); } nvgpu_log_info(g, "end: netlist: sw_ctx_load: register writes"); nvgpu_log_info(g, "configure sm_hww_esr_report mask after sw_ctx_load"); g->ops.gr.intr.set_hww_esr_report_mask(g); #ifdef CONFIG_NVGPU_GRAPHICS if (g->ops.gr.init.preemption_state != NULL) { err = g->ops.gr.init.preemption_state(g); if (err != 0) { goto clean_up; } } #endif nvgpu_cg_blcg_gr_load_enable(g); err = g->ops.gr.init.wait_idle(g); clean_up: if (err == 0) { nvgpu_log(g, gpu_dbg_gr, "done"); } return err; } static int nvgpu_gr_obj_ctx_commit_hw_state(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx) { int err = 0; struct netlist_av_list *sw_method_init = nvgpu_netlist_get_sw_method_init_av_list(g); #ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION struct netlist_av_list *sw_bundle_init = nvgpu_netlist_get_sw_bundle_init_av_list(g); #endif nvgpu_log(g, gpu_dbg_gr, " "); /* disable fe_go_idle */ g->ops.gr.init.fe_go_idle_timeout(g, false); nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer, config, gr_ctx, false); if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { /* override a few ctx state registers */ g->ops.gr.init.commit_global_timeslice(g); } /* floorsweep anything left */ err = nvgpu_gr_fs_state_init(g, config); if (err != 0) { goto restore_fe_go_idle; } err = g->ops.gr.init.wait_idle(g); if (err != 0) { goto restore_fe_go_idle; } #if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) if (g->ops.gr.init.auto_go_idle != NULL) { g->ops.gr.init.auto_go_idle(g, false); } #endif err = nvgpu_gr_obj_ctx_alloc_sw_bundle(g); if (err != 0) { goto restore_fe_go_idle; } #if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) if (g->ops.gr.init.auto_go_idle != NULL) { g->ops.gr.init.auto_go_idle(g, true); } #endif /* restore fe_go_idle */ g->ops.gr.init.fe_go_idle_timeout(g, true); /* load method init */ g->ops.gr.init.load_method_init(g, sw_method_init); #ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION /* restore stats bundle data through mme shadow methods */ if (g->ops.gr.init.restore_stats_counter_bundle_data != NULL) { g->ops.gr.init.restore_stats_counter_bundle_data(g, sw_bundle_init); } #endif err = g->ops.gr.init.wait_idle(g); if (err != 0) { goto clean_up; } nvgpu_log(g, gpu_dbg_gr, "done"); return 0; restore_fe_go_idle: /* restore fe_go_idle */ g->ops.gr.init.fe_go_idle_timeout(g, true); #if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) if (g->ops.gr.init.auto_go_idle != NULL) { g->ops.gr.init.auto_go_idle(g, true); } #endif clean_up: return err; } static int nvgpu_gr_obj_ctx_save_golden_ctx(struct gk20a *g, struct nvgpu_gr_obj_ctx_golden_image *golden_image, struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_mem *inst_block) { int err = 0; struct nvgpu_mem *gr_mem; u64 size; u32 data; #ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image_temp = NULL; #endif nvgpu_log(g, gpu_dbg_gr, " "); gr_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx); #ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION /* * Save ctx data before first golden context save. Restore same data * before second golden context save. This temporary copy is * saved in local_golden_image_temp. */ size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image); local_golden_image_temp = nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem, size); if (local_golden_image_temp == NULL) { err = -ENOMEM; goto clean_up; } #endif data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block); err = g->ops.gr.falcon.ctrl_ctxsw(g, NVGPU_GR_FALCON_METHOD_GOLDEN_IMAGE_SAVE, data, NULL); if (err != 0) { goto clean_up; } size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image); golden_image->local_golden_image = nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem, size); if (golden_image->local_golden_image == NULL) { err = -ENOMEM; goto clean_up; } #ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION /* Before second golden context save restore to before known state */ nvgpu_gr_global_ctx_load_local_golden_image(g, local_golden_image_temp, gr_mem); /* free local copy now */ nvgpu_gr_global_ctx_deinit_local_golden_image(g, local_golden_image_temp); local_golden_image_temp = NULL; /* Initiate second golden context save */ data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block); err = g->ops.gr.falcon.ctrl_ctxsw(g, NVGPU_GR_FALCON_METHOD_GOLDEN_IMAGE_SAVE, data, NULL); if (err != 0) { goto clean_up; } /* Copy the data to local buffer */ local_golden_image_temp = nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem, size); if (local_golden_image_temp == NULL) { err = -ENOMEM; goto clean_up; } /* Compare two golden context images */ if (!nvgpu_gr_global_ctx_compare_golden_images(g, nvgpu_mem_is_sysmem(gr_mem), golden_image->local_golden_image, local_golden_image_temp, size)) { nvgpu_err(g, "golden context mismatch"); err = -ENOMEM; } #endif clean_up: #ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION if (local_golden_image_temp != NULL) { nvgpu_gr_global_ctx_deinit_local_golden_image(g, local_golden_image_temp); } #endif if (err == 0) { nvgpu_log(g, gpu_dbg_gr, "golden image saved with size = %llu", size); } return err; } /* * init global golden image from a fresh gr_ctx in channel ctx. * save a copy in local_golden_image. */ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, struct nvgpu_gr_obj_ctx_golden_image *golden_image, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_mem *inst_block) { int err = 0; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); /* * golden ctx is global to all channels. Although only the first * channel initializes golden image, driver needs to prevent multiple * channels from initializing golden ctx at the same time */ nvgpu_mutex_acquire(&golden_image->ctx_mutex); if (golden_image->ready) { nvgpu_log(g, gpu_dbg_gr, "golden image already saved"); goto clean_up; } err = nvgpu_gr_obj_ctx_init_hw_state(g, inst_block); if (err != 0) { goto clean_up; } err = nvgpu_gr_obj_ctx_commit_hw_state(g, global_ctx_buffer, config, gr_ctx); if (err != 0) { goto clean_up; } #ifdef CONFIG_NVGPU_GRAPHICS if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { err = nvgpu_gr_ctx_init_zcull(g, gr_ctx); if (err != 0) { goto clean_up; } } #endif err = nvgpu_gr_obj_ctx_save_golden_ctx(g, golden_image, gr_ctx, inst_block); if (err != 0) { goto clean_up; } golden_image->ready = true; #ifdef CONFIG_NVGPU_POWER_PG nvgpu_pmu_set_golden_image_initialized(g, true); #endif g->ops.gr.falcon.set_current_ctx_invalid(g); clean_up: if (err != 0) { nvgpu_err(g, "fail"); } else { nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); } nvgpu_mutex_release(&golden_image->ctx_mutex); return err; } static int nvgpu_gr_obj_ctx_gr_ctx_alloc(struct gk20a *g, struct nvgpu_gr_obj_ctx_golden_image *golden_image, struct nvgpu_gr_ctx_desc *gr_ctx_desc, struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm) { u64 size; int err = 0; nvgpu_log_fn(g, " "); size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image); nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_CTX, nvgpu_safe_cast_u64_to_u32(size)); nvgpu_log(g, gpu_dbg_gr, "gr_ctx size = %llu", size); err = nvgpu_gr_ctx_alloc(g, gr_ctx, gr_ctx_desc, vm); if (err != 0) { return err; } return 0; } int nvgpu_gr_obj_ctx_alloc(struct gk20a *g, struct nvgpu_gr_obj_ctx_golden_image *golden_image, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_ctx_desc *gr_ctx_desc, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, struct vm_gk20a *vm, struct nvgpu_mem *inst_block, u32 class_num, u32 flags, bool cde, bool vpr) { int err = 0; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); err = nvgpu_gr_obj_ctx_gr_ctx_alloc(g, golden_image, gr_ctx_desc, gr_ctx, vm); if (err != 0) { nvgpu_err(g, "fail to allocate TSG gr ctx buffer"); goto out; } /* allocate patch buffer */ if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx))) { nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0); nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_PATCH_CTX, nvgpu_safe_mult_u32( g->ops.gr.init.get_patch_slots(g, config), PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)); err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, gr_ctx_desc, vm); if (err != 0) { nvgpu_err(g, "fail to allocate patch buffer"); goto out; } } #if defined(CONFIG_NVGPU_GRAPHICS) || defined(CONFIG_NVGPU_CILP) err = nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(g, config, gr_ctx_desc, gr_ctx, vm, class_num, flags); if (err != 0) { nvgpu_err(g, "fail to init preemption mode"); goto out; } #endif /* map global buffer to channel gpu_va and commit */ err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_ctx_buffer, vm, vpr); if (err != 0) { nvgpu_err(g, "fail to map global ctx buffer"); goto out; } nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer, config, gr_ctx, true); /* commit gr ctx buffer */ nvgpu_gr_obj_ctx_commit_inst(g, inst_block, gr_ctx, subctx, nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va); /* init golden image */ err = nvgpu_gr_obj_ctx_alloc_golden_ctx_image(g, golden_image, global_ctx_buffer, config, gr_ctx, inst_block); if (err != 0) { nvgpu_err(g, "fail to init golden ctx image"); goto out; } #ifdef CONFIG_NVGPU_POWER_PG /* Re-enable ELPG now that golden image has been initialized. * The PMU PG init code may already have tried to enable elpg, but * would not have been able to complete this action since the golden * image hadn't been initialized yet, so do this now. */ err = nvgpu_pmu_reenable_elpg(g); if (err != 0) { nvgpu_err(g, "fail to re-enable elpg"); goto out; } #endif /* load golden image */ nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx, golden_image->local_golden_image, cde); nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, config, gr_ctx, subctx); nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); return 0; out: /* * 1. gr_ctx, patch_ctx and global ctx buffer mapping * can be reused so no need to release them. * 2. golden image init and load is a one time thing so if * they pass, no need to undo. */ nvgpu_err(g, "fail"); return err; } void nvgpu_gr_obj_ctx_set_golden_image_size( struct nvgpu_gr_obj_ctx_golden_image *golden_image, size_t size) { golden_image->size = size; } size_t nvgpu_gr_obj_ctx_get_golden_image_size( struct nvgpu_gr_obj_ctx_golden_image *golden_image) { return golden_image->size; } #ifdef CONFIG_NVGPU_DEBUGGER u32 *nvgpu_gr_obj_ctx_get_local_golden_image_ptr( struct nvgpu_gr_obj_ctx_golden_image *golden_image) { return nvgpu_gr_global_ctx_get_local_golden_image_ptr( golden_image->local_golden_image); } #endif bool nvgpu_gr_obj_ctx_is_golden_image_ready( struct nvgpu_gr_obj_ctx_golden_image *golden_image) { bool ready; nvgpu_mutex_acquire(&golden_image->ctx_mutex); ready = golden_image->ready; nvgpu_mutex_release(&golden_image->ctx_mutex); return ready; } int nvgpu_gr_obj_ctx_init(struct gk20a *g, struct nvgpu_gr_obj_ctx_golden_image **gr_golden_image, u32 size) { struct nvgpu_gr_obj_ctx_golden_image *golden_image; nvgpu_log(g, gpu_dbg_gr, "size = %u", size); golden_image = nvgpu_kzalloc(g, sizeof(*golden_image)); if (golden_image == NULL) { return -ENOMEM; } nvgpu_gr_obj_ctx_set_golden_image_size(golden_image, size); nvgpu_mutex_init(&golden_image->ctx_mutex); *gr_golden_image = golden_image; return 0; } void nvgpu_gr_obj_ctx_deinit(struct gk20a *g, struct nvgpu_gr_obj_ctx_golden_image *golden_image) { if (golden_image == NULL) { return; } if (golden_image->local_golden_image != NULL) { nvgpu_gr_global_ctx_deinit_local_golden_image(g, golden_image->local_golden_image); golden_image->local_golden_image = NULL; } #ifdef CONFIG_NVGPU_POWER_PG nvgpu_pmu_set_golden_image_initialized(g, false); #endif golden_image->ready = false; nvgpu_kfree(g, golden_image); }