diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c index 745286450..dfda2e681 100644 --- a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c +++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c @@ -68,7 +68,7 @@ int nvgpu_acr_lsf_fecs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img) struct nvgpu_firmware *fecs_sig; struct flcn_ucode_img *p_img = (struct flcn_ucode_img *)lsf_ucode_img; struct nvgpu_ctxsw_ucode_segments *fecs = - nvgpu_gr_falcon_get_fecs_ucode_segments(g->gr.falcon); + nvgpu_gr_falcon_get_fecs_ucode_segments(g->gr->falcon); int err; fecs_sig = nvgpu_request_firmware(g, GM20B_FECS_UCODE_SIG, 0); @@ -110,7 +110,7 @@ int nvgpu_acr_lsf_fecs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img) p_img->desc->app_resident_data_offset = fecs->data.offset - fecs->code.offset; p_img->desc->app_resident_data_size = fecs->data.size; - p_img->data = nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr.falcon); + p_img->data = nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr->falcon); p_img->data_size = p_img->desc->image_size; p_img->fw_ver = NULL; @@ -132,7 +132,7 @@ int nvgpu_acr_lsf_gpccs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img) struct nvgpu_firmware *gpccs_sig; struct flcn_ucode_img *p_img = (struct flcn_ucode_img *)lsf_ucode_img; struct nvgpu_ctxsw_ucode_segments *gpccs = - nvgpu_gr_falcon_get_gpccs_ucode_segments(g->gr.falcon); + nvgpu_gr_falcon_get_gpccs_ucode_segments(g->gr->falcon); int err; if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { @@ -179,7 +179,7 @@ int nvgpu_acr_lsf_gpccs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img) ALIGN(gpccs->code.offset, 256); p_img->desc->app_resident_data_size = ALIGN(gpccs->data.size, 256); p_img->data = (u32 *) - ((u8 *)nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr.falcon) + + ((u8 *)nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr->falcon) + gpccs->boot.offset); p_img->data_size = ALIGN(p_img->desc->image_size, 256); p_img->fw_ver = NULL; @@ -808,7 +808,7 @@ int nvgpu_acr_prepare_ucode_blob_v0(struct gk20a *g) return err; } - err = nvgpu_gr_falcon_init_ctxsw_ucode(g, g->gr.falcon); + err = nvgpu_gr_falcon_init_ctxsw_ucode(g, g->gr->falcon); if (err != 0) { nvgpu_err(g, "gr_falcon_init_ctxsw_ucode failed err=%d", err); return err; diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v1.c b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v1.c index 264519abc..5a0eb14f5 100644 --- a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v1.c +++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v1.c @@ -76,7 +76,7 @@ int nvgpu_acr_lsf_fecs_ucode_details_v1(struct gk20a *g, void *lsf_ucode_img) struct flcn_ucode_img_v1 *p_img = (struct flcn_ucode_img_v1 *)lsf_ucode_img; struct nvgpu_ctxsw_ucode_segments *fecs = - nvgpu_gr_falcon_get_fecs_ucode_segments(g->gr.falcon); + nvgpu_gr_falcon_get_fecs_ucode_segments(g->gr->falcon); int err; switch (ver) { @@ -133,7 +133,7 @@ int nvgpu_acr_lsf_fecs_ucode_details_v1(struct gk20a *g, void *lsf_ucode_img) p_img->desc->app_resident_data_offset = fecs->data.offset - fecs->code.offset; p_img->desc->app_resident_data_size = fecs->data.size; - p_img->data = nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr.falcon); + p_img->data = nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr->falcon); p_img->data_size = p_img->desc->image_size; p_img->fw_ver = NULL; @@ -160,7 +160,7 @@ int nvgpu_acr_lsf_gpccs_ucode_details_v1(struct gk20a *g, void *lsf_ucode_img) struct flcn_ucode_img_v1 *p_img = (struct flcn_ucode_img_v1 *)lsf_ucode_img; struct nvgpu_ctxsw_ucode_segments *gpccs = - nvgpu_gr_falcon_get_gpccs_ucode_segments(g->gr.falcon); + nvgpu_gr_falcon_get_gpccs_ucode_segments(g->gr->falcon); int err; if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { @@ -221,7 +221,7 @@ int nvgpu_acr_lsf_gpccs_ucode_details_v1(struct gk20a *g, void *lsf_ucode_img) ALIGN(gpccs->code.offset, 256); p_img->desc->app_resident_data_size = ALIGN(gpccs->data.size, 256); p_img->data = (u32 *) - ((u8 *)nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr.falcon) + + ((u8 *)nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr->falcon) + gpccs->boot.offset); p_img->data_size = ALIGN(p_img->desc->image_size, 256); p_img->fw_ver = NULL; @@ -931,7 +931,7 @@ int nvgpu_acr_prepare_ucode_blob_v1(struct gk20a *g) plsfm = &lsfm_l; (void) memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr_v1)); - err = nvgpu_gr_falcon_init_ctxsw_ucode(g, g->gr.falcon); + err = nvgpu_gr_falcon_init_ctxsw_ucode(g, g->gr->falcon); if (err != 0) { nvgpu_err(g, "gr_falcon_init_ctxsw_ucode failed err=%d", err); return err; diff --git a/drivers/gpu/nvgpu/common/ecc.c b/drivers/gpu/nvgpu/common/ecc.c index 42c9f799f..8c885e71d 100644 --- a/drivers/gpu/nvgpu/common/ecc.c +++ b/drivers/gpu/nvgpu/common/ecc.c @@ -45,7 +45,7 @@ static void nvgpu_ecc_init(struct gk20a *g) int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, struct nvgpu_ecc_stat ***stat, const char *name) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; struct nvgpu_ecc_stat **stats; u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr->config); u32 gpc, tpc; @@ -91,7 +91,7 @@ int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g, int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g, struct nvgpu_ecc_stat **stat, const char *name) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; struct nvgpu_ecc_stat *stats; u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr->config); u32 gpc; @@ -196,7 +196,7 @@ int nvgpu_ecc_counter_init_per_fbpa(struct gk20a *g, void nvgpu_ecc_free(struct gk20a *g) { struct nvgpu_ecc *ecc = &g->ecc; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr->config); u32 i; diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index 28f55e162..5e77de020 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -2786,7 +2786,7 @@ int nvgpu_channel_deferred_reset_engines(struct gk20a *g, return 0; } - err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr->falcon); if (err != 0) { nvgpu_err(g, "failed to disable ctxsw"); goto fail; @@ -2821,7 +2821,7 @@ int nvgpu_channel_deferred_reset_engines(struct gk20a *g, nvgpu_mutex_release(&f->deferred_reset_mutex); clean_up: - err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr->falcon); if (err != 0) { nvgpu_err(g, "failed to enable ctxsw"); } diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index 8946b314a..c123730ac 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -595,7 +595,7 @@ static struct tsg_gk20a *gk20a_tsg_acquire_unused_tsg(struct fifo_gk20a *f) int nvgpu_tsg_open_common(struct gk20a *g, struct tsg_gk20a *tsg, pid_t pid) { - u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr.config); + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr->config); int err; /* we need to allocate this after g->ops.gr.init_fs_state() since diff --git a/drivers/gpu/nvgpu/common/gr/fecs_trace.c b/drivers/gpu/nvgpu/common/gr/fecs_trace.c index 9d2adc143..094c8c733 100644 --- a/drivers/gpu/nvgpu/common/gr/fecs_trace.c +++ b/drivers/gpu/nvgpu/common/gr/fecs_trace.c @@ -214,7 +214,7 @@ struct nvgpu_fecs_trace_record *nvgpu_gr_fecs_trace_get_record( struct gk20a *g, int idx) { struct nvgpu_mem *mem = nvgpu_gr_global_ctx_buffer_get_mem( - g->gr.global_ctx_buffer, + g->gr->global_ctx_buffer, NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER); if (mem == NULL) { return NULL; @@ -573,7 +573,7 @@ int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g, pid, context_ptr, nvgpu_inst_block_addr(g, inst_block)); - mem = nvgpu_gr_global_ctx_buffer_get_mem(g->gr.global_ctx_buffer, + mem = nvgpu_gr_global_ctx_buffer_get_mem(g->gr->global_ctx_buffer, NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER); if (mem == NULL) { return -EINVAL; diff --git a/drivers/gpu/nvgpu/common/gr/fs_state.c b/drivers/gpu/nvgpu/common/gr/fs_state.c index 5a928fd8e..1925a18c8 100644 --- a/drivers/gpu/nvgpu/common/gr/fs_state.c +++ b/drivers/gpu/nvgpu/common/gr/fs_state.c @@ -84,7 +84,6 @@ int nvgpu_gr_fs_state_init(struct gk20a *g, struct nvgpu_gr_config *config) u32 fuse_tpc_mask; u32 gpc_cnt, tpc_cnt, max_tpc_cnt; int err = 0; - struct nvgpu_gr_config *gr_config = config; nvgpu_log_fn(g, " "); @@ -94,39 +93,39 @@ int nvgpu_gr_fs_state_init(struct gk20a *g, struct nvgpu_gr_config *config) } if (g->ops.gr.config.init_sm_id_table != NULL) { - err = g->ops.gr.config.init_sm_id_table(g, gr_config); + err = g->ops.gr.config.init_sm_id_table(g, config); if (err != 0) { return err; } /* Is table empty ? */ - if (nvgpu_gr_config_get_no_of_sm(gr_config) == 0U) { + if (nvgpu_gr_config_get_no_of_sm(config) == 0U) { return -EINVAL; } } - for (sm_id = 0; sm_id < nvgpu_gr_config_get_no_of_sm(gr_config); + for (sm_id = 0; sm_id < nvgpu_gr_config_get_no_of_sm(config); sm_id++) { struct sm_info *sm_info = - nvgpu_gr_config_get_sm_info(gr_config, sm_id); + nvgpu_gr_config_get_sm_info(config, sm_id); tpc_index = nvgpu_gr_config_get_sm_info_tpc_index(sm_info); gpc_index = nvgpu_gr_config_get_sm_info_gpc_index(sm_info); g->ops.gr.init.sm_id_numbering(g, gpc_index, tpc_index, sm_id, - gr_config); + config); } - g->ops.gr.init.pd_tpc_per_gpc(g, gr_config); + g->ops.gr.init.pd_tpc_per_gpc(g, config); /* gr__setup_pd_mapping */ - g->ops.gr.init.rop_mapping(g, gr_config); + g->ops.gr.init.rop_mapping(g, config); - g->ops.gr.init.pd_skip_table_gpc(g, gr_config); + g->ops.gr.init.pd_skip_table_gpc(g, config); - fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, gr_config, 0); - gpc_cnt = nvgpu_gr_config_get_gpc_count(gr_config); - tpc_cnt = nvgpu_gr_config_get_tpc_count(gr_config); - max_tpc_cnt = nvgpu_gr_config_get_max_tpc_count(gr_config); + fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, 0); + gpc_cnt = nvgpu_gr_config_get_gpc_count(config); + tpc_cnt = nvgpu_gr_config_get_tpc_count(config); + max_tpc_cnt = nvgpu_gr_config_get_max_tpc_count(config); if ((g->tpc_fs_mask_user != 0U) && (fuse_tpc_mask == BIT32(max_tpc_cnt) - 1U)) { diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c index 4bc4bd0c5..e357bdb6f 100644 --- a/drivers/gpu/nvgpu/common/gr/gr.c +++ b/drivers/gpu/nvgpu/common/gr/gr.c @@ -38,7 +38,7 @@ static int gr_alloc_global_ctx_buffers(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; int err; u32 size; @@ -61,8 +61,8 @@ static int gr_alloc_global_ctx_buffers(struct gk20a *g) NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR, size); size = g->ops.gr.init.get_global_attr_cb_size(g, - nvgpu_gr_config_get_tpc_count(g->gr.config), - nvgpu_gr_config_get_max_tpc_count(g->gr.config)); + nvgpu_gr_config_get_tpc_count(g->gr->config), + nvgpu_gr_config_get_max_tpc_count(g->gr->config)); nvgpu_log_info(g, "attr_buffer_size : %u", size); nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, @@ -120,7 +120,7 @@ u32 nvgpu_gr_tpc_offset(struct gk20a *g, u32 tpc) void nvgpu_gr_init(struct gk20a *g) { - nvgpu_cond_init(&g->gr.init_wq); + nvgpu_cond_init(&g->gr->init_wq); } int nvgpu_gr_suspend(struct gk20a *g) @@ -141,11 +141,11 @@ int nvgpu_gr_suspend(struct gk20a *g) g->ops.gr.intr.enable_interrupts(g, false); /* disable all exceptions */ - g->ops.gr.intr.enable_exceptions(g, g->gr.config, false); + g->ops.gr.intr.enable_exceptions(g, g->gr->config, false); nvgpu_gr_flush_channel_tlb(g); - g->gr.initialized = false; + g->gr->initialized = false; nvgpu_log_fn(g, "done"); return ret; @@ -154,16 +154,16 @@ int nvgpu_gr_suspend(struct gk20a *g) /* invalidate channel lookup tlb */ void nvgpu_gr_flush_channel_tlb(struct gk20a *g) { - nvgpu_spinlock_acquire(&g->gr.ch_tlb_lock); - (void) memset(g->gr.chid_tlb, 0, + nvgpu_spinlock_acquire(&g->gr->ch_tlb_lock); + (void) memset(g->gr->chid_tlb, 0, sizeof(struct gr_channel_map_tlb_entry) * GR_CHANNEL_MAP_TLB_SIZE); - nvgpu_spinlock_release(&g->gr.ch_tlb_lock); + nvgpu_spinlock_release(&g->gr->ch_tlb_lock); } static int gr_init_setup_hw(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; int err; nvgpu_log_fn(g, " "); @@ -249,9 +249,9 @@ out: return err; } -static void gr_remove_support(struct gr_gk20a *gr) +static void gr_remove_support(struct gk20a *g) { - struct gk20a *g = gr->g; + struct nvgpu_gr *gr = g->gr; nvgpu_log_fn(g, " "); @@ -281,7 +281,7 @@ static void gr_remove_support(struct gr_gk20a *gr) gr->ctx_vars.golden_image_initialized = false; } -static int gr_init_access_map(struct gk20a *g, struct gr_gk20a *gr) +static int gr_init_access_map(struct gk20a *g, struct nvgpu_gr *gr) { struct nvgpu_mem *mem; u32 nr_pages = @@ -317,7 +317,7 @@ static int gr_init_access_map(struct gk20a *g, struct gr_gk20a *gr) return 0; } -static int gr_init_config(struct gk20a *g, struct gr_gk20a *gr) +static int gr_init_config(struct gk20a *g, struct nvgpu_gr *gr) { gr->config = nvgpu_gr_config_init(g); if (gr->config == NULL) { @@ -368,7 +368,7 @@ clean_up: static int gr_init_setup_sw(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; int err = 0; nvgpu_log_fn(g, " "); @@ -381,7 +381,7 @@ static int gr_init_setup_sw(struct gk20a *g) gr->g = g; #if defined(CONFIG_GK20A_CYCLE_STATS) - err = nvgpu_mutex_init(&g->gr.cs_lock); + err = nvgpu_mutex_init(&g->gr->cs_lock); if (err != 0) { nvgpu_err(g, "Error in gr.cs_lock mutex initialization"); return err; @@ -389,7 +389,7 @@ static int gr_init_setup_sw(struct gk20a *g) #endif err = nvgpu_gr_obj_ctx_init(g, &gr->golden_image, - g->gr.ctx_vars.golden_image_size); + g->gr->ctx_vars.golden_image_size); if (err != 0) { goto clean_up; } @@ -399,8 +399,8 @@ static int gr_init_setup_sw(struct gk20a *g) goto clean_up; } - err = nvgpu_gr_hwpm_map_init(g, &g->gr.hwpm_map, - g->gr.ctx_vars.pm_ctxsw_image_size); + err = nvgpu_gr_hwpm_map_init(g, &g->gr->hwpm_map, + g->gr->ctx_vars.pm_ctxsw_image_size); if (err != 0) { nvgpu_err(g, "hwpm_map init failed"); goto clean_up; @@ -411,7 +411,8 @@ static int gr_init_setup_sw(struct gk20a *g) goto clean_up; } - err = nvgpu_gr_zcull_init(g, &gr->zcull, gr->ctx_vars.zcull_image_size); + err = nvgpu_gr_zcull_init(g, &gr->zcull, + gr->ctx_vars.zcull_image_size, gr->config); if (err != 0) { goto clean_up; } @@ -466,7 +467,7 @@ static int gr_init_setup_sw(struct gk20a *g) clean_up: nvgpu_err(g, "fail"); - gr_remove_support(gr); + gr_remove_support(g); return err; } @@ -510,7 +511,7 @@ out: int nvgpu_gr_prepare_sw(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; int err = 0; nvgpu_log_fn(g, " "); @@ -526,7 +527,6 @@ int nvgpu_gr_prepare_sw(struct gk20a *g) if (gr->falcon == NULL) { nvgpu_err(g, "failed to init gr falcon"); err = -ENOMEM; - return err; } } return err; @@ -570,9 +570,9 @@ int nvgpu_gr_reset(struct gk20a *g) { int err; struct nvgpu_mutex *fecs_mutex = - nvgpu_gr_falcon_get_fecs_mutex(g->gr.falcon); + nvgpu_gr_falcon_get_fecs_mutex(g->gr->falcon); - g->gr.initialized = false; + g->gr->initialized = false; nvgpu_mutex_acquire(fecs_mutex); @@ -588,7 +588,7 @@ int nvgpu_gr_reset(struct gk20a *g) return err; } - err = nvgpu_gr_falcon_init_ctxsw(g, g->gr.falcon); + err = nvgpu_gr_falcon_init_ctxsw(g, g->gr->falcon); if (err != 0) { nvgpu_mutex_release(fecs_mutex); return err; @@ -615,8 +615,8 @@ int nvgpu_gr_reset(struct gk20a *g) nvgpu_cg_elcg_enable_no_wait(g); /* GR is inialized, signal possible waiters */ - g->gr.initialized = true; - nvgpu_cond_signal(&g->gr.init_wq); + g->gr->initialized = true; + nvgpu_cond_signal(&g->gr->init_wq); return err; } @@ -626,9 +626,9 @@ int nvgpu_gr_init_support(struct gk20a *g) nvgpu_log_fn(g, " "); - g->gr.initialized = false; + g->gr->initialized = false; - err = nvgpu_gr_falcon_init_ctxsw(g, g->gr.falcon); + err = nvgpu_gr_falcon_init_ctxsw(g, g->gr->falcon); if (err != 0) { return err; } @@ -660,8 +660,8 @@ int nvgpu_gr_init_support(struct gk20a *g) nvgpu_cg_elcg_enable_no_wait(g); /* GR is inialized, signal possible waiters */ - g->gr.initialized = true; - nvgpu_cond_signal(&g->gr.init_wq); + g->gr->initialized = true; + nvgpu_cond_signal(&g->gr->init_wq); return 0; } @@ -669,5 +669,33 @@ int nvgpu_gr_init_support(struct gk20a *g) /* Wait until GR is initialized */ void nvgpu_gr_wait_initialized(struct gk20a *g) { - NVGPU_COND_WAIT(&g->gr.init_wq, g->gr.initialized, 0U); + NVGPU_COND_WAIT(&g->gr->init_wq, g->gr->initialized, 0U); +} + +int nvgpu_gr_alloc(struct gk20a *g) +{ + struct nvgpu_gr *gr = NULL; + + /* if gr exists return */ + if ((g != NULL) && (g->gr != NULL)) { + return 0; + } + + /* Allocate memory for gr struct */ + gr = nvgpu_kzalloc(g, sizeof(*gr)); + if (gr == NULL) { + return -ENOMEM; + } + g->gr = gr; + + return 0; +} + +void nvgpu_gr_free(struct gk20a *g) +{ + /*Delete gr memory */ + if (g->gr != NULL) { + nvgpu_kfree(g, g->gr); + } + g->gr = NULL; } diff --git a/drivers/gpu/nvgpu/common/gr/gr_falcon.c b/drivers/gpu/nvgpu/common/gr/gr_falcon.c index 4db639181..86aef7132 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_falcon.c +++ b/drivers/gpu/nvgpu/common/gr/gr_falcon.c @@ -165,7 +165,7 @@ int nvgpu_gr_falcon_init_ctx_state(struct gk20a *g) nvgpu_log_fn(g, " "); - if (!g->gr.ctx_vars.golden_image_initialized) { + if (!g->gr->ctx_vars.golden_image_initialized) { /* fecs init ramchain */ err = g->ops.gr.falcon.init_ctx_state(g); if (err != 0) { @@ -465,7 +465,7 @@ int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g, struct nvgpu_gr_falcon *falcon) { int err; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; nvgpu_log_fn(g, " "); diff --git a/drivers/gpu/nvgpu/common/gr/gr_intr.c b/drivers/gpu/nvgpu/common/gr/gr_intr.c index c2c3ea70b..ea9a5ebfc 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_intr.c +++ b/drivers/gpu/nvgpu/common/gr/gr_intr.c @@ -116,7 +116,7 @@ struct channel_gk20a *nvgpu_gr_intr_get_channel_from_ctx(struct gk20a *g, u32 curr_ctx, u32 *curr_tsgid) { struct fifo_gk20a *f = &g->fifo; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 chid; u32 tsgid = NVGPU_INVALID_TSG_ID; u32 i; diff --git a/drivers/gpu/nvgpu/common/gr/gr_setup.c b/drivers/gpu/nvgpu/common/gr/gr_setup.c index 87a18b90a..afc0aad33 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_setup.c +++ b/drivers/gpu/nvgpu/common/gr/gr_setup.c @@ -128,9 +128,9 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, tsg->vm = c->vm; nvgpu_vm_get(tsg->vm); - err = nvgpu_gr_obj_ctx_alloc(g, g->gr.golden_image, - g->gr.global_ctx_buffer, g->gr.gr_ctx_desc, - g->gr.config, gr_ctx, c->subctx, + err = nvgpu_gr_obj_ctx_alloc(g, g->gr->golden_image, + g->gr->global_ctx_buffer, g->gr->gr_ctx_desc, + g->gr->config, gr_ctx, c->subctx, tsg->vm, &c->inst_block, class_num, flags, c->cde, c->vpr); if (err != 0) { @@ -181,12 +181,12 @@ void nvgpu_gr_setup_free_gr_ctx(struct gk20a *g, if (gr_ctx != NULL) { if ((g->ops.gr.ctxsw_prog.dump_ctxsw_stats != NULL) && - g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) { + g->gr->ctx_vars.dump_ctxsw_stats_on_channel_close) { g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g, nvgpu_gr_ctx_get_ctx_mem(gr_ctx)); } - nvgpu_gr_ctx_free(g, gr_ctx, g->gr.global_ctx_buffer, vm); + nvgpu_gr_ctx_free(g, gr_ctx, g->gr->global_ctx_buffer, vm); } } @@ -251,8 +251,8 @@ int nvgpu_gr_setup_set_preemption_mode(struct channel_gk20a *ch, ch->tgid, graphics_preempt_mode, compute_preempt_mode); - err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, g->gr.config, - g->gr.gr_ctx_desc, gr_ctx, vm, class, + err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, g->gr->config, + g->gr->gr_ctx_desc, gr_ctx, vm, class, graphics_preempt_mode, compute_preempt_mode); if (err != 0) { nvgpu_err(g, "set_ctxsw_preemption_mode failed"); @@ -269,7 +269,7 @@ int nvgpu_gr_setup_set_preemption_mode(struct channel_gk20a *ch, goto enable_ch; } - nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, g->gr.config, gr_ctx, + nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, g->gr->config, gr_ctx, ch->subctx); err = nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true); @@ -277,7 +277,7 @@ int nvgpu_gr_setup_set_preemption_mode(struct channel_gk20a *ch, nvgpu_err(g, "can't map patch context"); goto enable_ch; } - g->ops.gr.init.commit_global_cb_manager(g, g->gr.config, gr_ctx, + g->ops.gr.init.commit_global_cb_manager(g, g->gr->config, gr_ctx, true); nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true); diff --git a/drivers/gpu/nvgpu/common/gr/hwpm_map.c b/drivers/gpu/nvgpu/common/gr/hwpm_map.c index 687fb6e28..dc3acff61 100644 --- a/drivers/gpu/nvgpu/common/gr/hwpm_map.c +++ b/drivers/gpu/nvgpu/common/gr/hwpm_map.c @@ -225,7 +225,7 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, struct ctxsw_buf_offset_map_entry *map, u32 *count, u32 *offset, u32 max_cnt) { - u32 num_gpcs = nvgpu_gr_config_get_gpc_count(g->gr.config); + u32 num_gpcs = nvgpu_gr_config_get_gpc_count(g->gr->config); u32 num_ppcs, num_tpcs, gpc_num, base; u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); @@ -235,7 +235,7 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) { - num_tpcs = nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num); + num_tpcs = nvgpu_gr_config_get_gpc_tpc_count(g->gr->config, gpc_num); base = gpc_base + (gpc_stride * gpc_num) + tpc_in_gpc_base; if (add_ctxsw_buffer_map_entries_subunits(map, nvgpu_netlist_get_pm_tpc_ctxsw_regs(g), @@ -245,7 +245,7 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, return -EINVAL; } - num_ppcs = nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc_num); + num_ppcs = nvgpu_gr_config_get_gpc_ppc_count(g->gr->config, gpc_num); base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base; if (add_ctxsw_buffer_map_entries_subunits(map, nvgpu_netlist_get_pm_ppc_ctxsw_regs(g), @@ -436,7 +436,7 @@ static int nvgpu_gr_hwpm_map_create(struct gk20a *g, /* Add entries from _LIST_nv_perf_fbp_ctx_regs */ if (add_ctxsw_buffer_map_entries_subunits(map, nvgpu_netlist_get_fbp_ctxsw_regs(g), &count, &offset, - hwpm_ctxsw_reg_count_max, 0, g->gr.num_fbps, ~U32(0U), + hwpm_ctxsw_reg_count_max, 0, g->gr->num_fbps, ~U32(0U), g->ops.perf.get_pmm_per_chiplet_offset(), ~U32(0U)) != 0) { goto cleanup; @@ -446,7 +446,7 @@ static int nvgpu_gr_hwpm_map_create(struct gk20a *g, if (add_ctxsw_buffer_map_entries_subunits(map, nvgpu_netlist_get_fbp_router_ctxsw_regs(g), &count, &offset, hwpm_ctxsw_reg_count_max, 0, - g->gr.num_fbps, ~U32(0U), NV_PERF_PMM_FBP_ROUTER_STRIDE, + g->gr->num_fbps, ~U32(0U), NV_PERF_PMM_FBP_ROUTER_STRIDE, ~U32(0U)) != 0) { goto cleanup; } diff --git a/drivers/gpu/nvgpu/common/gr/obj_ctx.c b/drivers/gpu/nvgpu/common/gr/obj_ctx.c index 7fd62779f..d552476e6 100644 --- a/drivers/gpu/nvgpu/common/gr/obj_ctx.c +++ b/drivers/gpu/nvgpu/common/gr/obj_ctx.c @@ -114,12 +114,12 @@ int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g, } if (g->ops.class.is_valid_gfx(class) && - g->gr.ctx_vars.force_preemption_gfxp) { + g->gr->ctx_vars.force_preemption_gfxp) { graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; } if (g->ops.class.is_valid_compute(class) && - g->gr.ctx_vars.force_preemption_cilp) { + g->gr->ctx_vars.force_preemption_cilp) { compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; } @@ -149,7 +149,7 @@ int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g, nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_PREEMPT_CTXSW, - g->gr.ctx_vars.preempt_image_size); + g->gr->ctx_vars.preempt_image_size); nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_SPILL_CTXSW, spill_size); nvgpu_gr_ctx_set_size(gr_ctx_desc, @@ -264,7 +264,7 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, if (g->ops.gr.init.gfxp_wfi_timeout != NULL) { g->ops.gr.init.gfxp_wfi_timeout(g, gr_ctx, - g->gr.gfxp_wfi_timeout_count, true); + g->gr->gfxp_wfi_timeout_count, true); } if (g->ops.gr.init.commit_gfxp_rtv_cb != NULL) { @@ -446,8 +446,8 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, if (g->ops.gr.init.preemption_state != NULL) { err = g->ops.gr.init.preemption_state(g, - g->gr.gfxp_wfi_timeout_count, - g->gr.gfxp_wfi_timeout_unit_usec); + g->gr->gfxp_wfi_timeout_count, + g->gr->gfxp_wfi_timeout_unit_usec); if (err != 0) { goto clean_up; } @@ -526,7 +526,7 @@ restore_fe_go_idle: } golden_image->ready = true; - g->gr.ctx_vars.golden_image_initialized = true; + g->gr->ctx_vars.golden_image_initialized = true; g->ops.gr.falcon.set_current_ctx_invalid(g); diff --git a/drivers/gpu/nvgpu/common/gr/zcull.c b/drivers/gpu/nvgpu/common/gr/zcull.c index e52b4304a..5034cc851 100644 --- a/drivers/gpu/nvgpu/common/gr/zcull.c +++ b/drivers/gpu/nvgpu/common/gr/zcull.c @@ -31,9 +31,8 @@ #include "zcull_priv.h" int nvgpu_gr_zcull_init(struct gk20a *g, struct nvgpu_gr_zcull **gr_zcull, - u32 size) + u32 size, struct nvgpu_gr_config *config) { - struct nvgpu_gr_config *gr_config = g->gr.config; struct nvgpu_gr_zcull *zcull; int err = 0; @@ -47,11 +46,11 @@ int nvgpu_gr_zcull_init(struct gk20a *g, struct nvgpu_gr_zcull **gr_zcull, zcull->zcull_ctxsw_image_size = size; - zcull->aliquot_width = nvgpu_gr_config_get_tpc_count(gr_config) * 16U; + zcull->aliquot_width = nvgpu_gr_config_get_tpc_count(config) * 16U; zcull->aliquot_height = 16; zcull->width_align_pixels = - nvgpu_gr_config_get_tpc_count(gr_config) * 16U; + nvgpu_gr_config_get_tpc_count(config) * 16U; zcull->height_align_pixels = 32; zcull->aliquot_size = @@ -59,10 +58,10 @@ int nvgpu_gr_zcull_init(struct gk20a *g, struct nvgpu_gr_zcull **gr_zcull, /* assume no floor sweeping since we only have 1 tpc in 1 gpc */ zcull->pixel_squares_by_aliquots = - nvgpu_gr_config_get_zcb_count(gr_config) * 16U * 16U * - nvgpu_gr_config_get_tpc_count(gr_config) / - (nvgpu_gr_config_get_gpc_count(gr_config) * - nvgpu_gr_config_get_gpc_tpc_count(gr_config, 0U)); + nvgpu_gr_config_get_zcb_count(config) * 16U * 16U * + nvgpu_gr_config_get_tpc_count(config) / + (nvgpu_gr_config_get_gpc_count(config) * + nvgpu_gr_config_get_gpc_tpc_count(config, 0U)); exit: *gr_zcull = zcull; diff --git a/drivers/gpu/nvgpu/common/perf/cyclestats_snapshot.c b/drivers/gpu/nvgpu/common/perf/cyclestats_snapshot.c index a85794fb6..b6207ec26 100644 --- a/drivers/gpu/nvgpu/common/perf/cyclestats_snapshot.c +++ b/drivers/gpu/nvgpu/common/perf/cyclestats_snapshot.c @@ -85,7 +85,7 @@ void nvgpu_css_set_handled_snapshots(struct gk20a *g, u32 done) * from locked context (protected by cs_lock) */ -static int css_gr_create_shared_data(struct gr_gk20a *gr) +static int css_gr_create_shared_data(struct nvgpu_gr *gr) { struct gk20a_cs_snapshot *data; @@ -108,7 +108,7 @@ int nvgpu_css_enable_snapshot(struct channel_gk20a *ch, struct gk20a_cs_snapshot_client *cs_client) { struct gk20a *g = ch->g; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; struct gk20a_cs_snapshot *data = gr->cs_data; u32 snapshot_size = cs_client->snapshot_size; int ret; @@ -161,7 +161,7 @@ failed_allocation: return ret; } -void nvgpu_css_disable_snapshot(struct gr_gk20a *gr) +void nvgpu_css_disable_snapshot(struct nvgpu_gr *gr) { struct gk20a *g = gr->g; struct gk20a_cs_snapshot *data = gr->cs_data; @@ -180,7 +180,7 @@ void nvgpu_css_disable_snapshot(struct gr_gk20a *gr) nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots disabled\n"); } -static void css_gr_free_shared_data(struct gr_gk20a *gr) +static void css_gr_free_shared_data(struct nvgpu_gr *gr) { struct gk20a *g = gr->g; @@ -213,7 +213,7 @@ nvgpu_css_gr_search_client(struct nvgpu_list_node *clients, u32 perfmon) static int css_gr_flush_snapshots(struct channel_gk20a *ch) { struct gk20a *g = ch->g; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; struct gk20a_cs_snapshot *css = gr->cs_data; struct gk20a_cs_snapshot_client *cur; u32 pending, completed; @@ -471,7 +471,7 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch, { int ret = 0; struct gk20a *g = ch->g; - struct gr_gk20a *gr; + struct nvgpu_gr *gr; /* we must have a placeholder to store pointer to client structure */ if (!cs_client) { @@ -485,7 +485,7 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch, nvgpu_speculation_barrier(); - gr = &g->gr; + gr = g->gr; nvgpu_mutex_acquire(&gr->cs_lock); @@ -539,13 +539,13 @@ int gr_gk20a_css_detach(struct channel_gk20a *ch, { int ret = 0; struct gk20a *g = ch->g; - struct gr_gk20a *gr; + struct nvgpu_gr *gr; if (!cs_client) { return -EINVAL; } - gr = &g->gr; + gr = g->gr; nvgpu_mutex_acquire(&gr->cs_lock); if (gr->cs_data) { struct gk20a_cs_snapshot *data = gr->cs_data; @@ -571,13 +571,13 @@ int gr_gk20a_css_flush(struct channel_gk20a *ch, { int ret = 0; struct gk20a *g = ch->g; - struct gr_gk20a *gr; + struct nvgpu_gr *gr; if (!cs_client) { return -EINVAL; } - gr = &g->gr; + gr = g->gr; nvgpu_mutex_acquire(&gr->cs_lock); ret = css_gr_flush_snapshots(ch); nvgpu_mutex_release(&gr->cs_lock); @@ -588,7 +588,7 @@ int gr_gk20a_css_flush(struct channel_gk20a *ch, /* helper function with locking to cleanup snapshot code code in gr_gk20a.c */ void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; nvgpu_mutex_acquire(&gr->cs_lock); css_gr_free_shared_data(gr); @@ -600,7 +600,7 @@ int nvgpu_css_check_data_available(struct channel_gk20a *ch, u32 *pending, bool *hw_overflow) { struct gk20a *g = ch->g; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; struct gk20a_cs_snapshot *css = gr->cs_data; if (!css->hw_snapshot) { diff --git a/drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.c b/drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.c index 7866d66ce..adbf7a8bf 100644 --- a/drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.c +++ b/drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.c @@ -246,7 +246,7 @@ static int pmu_enable_elpg_locked(struct gk20a *g, u8 pg_engine_id) int nvgpu_pmu_enable_elpg(struct gk20a *g) { struct nvgpu_pmu *pmu = &g->pmu; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u8 pg_engine_id; u32 pg_engine_id_list = 0; diff --git a/drivers/gpu/nvgpu/common/rc/rc.c b/drivers/gpu/nvgpu/common/rc/rc.c index cbfea89d3..ce170e694 100644 --- a/drivers/gpu/nvgpu/common/rc/rc.c +++ b/drivers/gpu/nvgpu/common/rc/rc.c @@ -190,7 +190,7 @@ void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct tsg_gk20a *tsg, * changing until engine status is checked to make sure tsg * being recovered is not loaded on the engines */ - err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr->falcon); if (err != 0) { /* if failed to disable ctxsw, just abort tsg */ @@ -207,7 +207,7 @@ void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct tsg_gk20a *tsg, * By that time if tsg is not on the engine, engine need not * be reset. */ - err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr->falcon); if (err != 0) { nvgpu_err(g, "failed to enable ctxsw"); } diff --git a/drivers/gpu/nvgpu/common/regops/regops.c b/drivers/gpu/nvgpu/common/regops/regops.c index cd4c7324c..1b54738fb 100644 --- a/drivers/gpu/nvgpu/common/regops/regops.c +++ b/drivers/gpu/nvgpu/common/regops/regops.c @@ -66,7 +66,7 @@ static inline bool linear_search(u32 offset, const u32 *list, u64 size) * But note: while the dbg_gpu bind requires the a channel fd, * it doesn't require an allocated gr/compute obj at that point... */ -static bool gr_context_info_available(struct gr_gk20a *gr) +static bool gr_context_info_available(struct nvgpu_gr *gr) { bool initialized; @@ -122,7 +122,7 @@ int exec_regops_gk20a(struct gk20a *g, /* be sure that ctx info is in place if there are ctx ops */ if ((ctx_wr_count | ctx_rd_count) != 0U) { - if (!gr_context_info_available(&g->gr)) { + if (!gr_context_info_available(g->gr)) { nvgpu_err(g, "gr context data not available"); return -ENODEV; } diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c index aed7bff4c..a95a03c7c 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c @@ -45,7 +45,7 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, { struct tegra_vgpu_cmd_msg msg = {0}; struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 golden_image_size; int err; @@ -178,7 +178,7 @@ int vgpu_gr_alloc_pm_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, } pm_ctx->mem.gpu_va = nvgpu_vm_alloc_va(vm, - nvgpu_gr_hwpm_map_get_size(g->gr.hwpm_map), + nvgpu_gr_hwpm_map_get_size(g->gr->hwpm_map), GMMU_PAGE_SIZE_KERNEL); if (!pm_ctx->mem.gpu_va) { @@ -186,7 +186,7 @@ int vgpu_gr_alloc_pm_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, return -ENOMEM; } - pm_ctx->mem.size = nvgpu_gr_hwpm_map_get_size(g->gr.hwpm_map); + pm_ctx->mem.size = nvgpu_gr_hwpm_map_get_size(g->gr->hwpm_map); return 0; } diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c index bca3cf30d..9fd085c23 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c @@ -139,21 +139,21 @@ int vgpu_gr_init_ctx_state(struct gk20a *g) nvgpu_log_fn(g, " "); - g->gr.ctx_vars.golden_image_size = priv->constants.golden_ctx_size; - g->gr.ctx_vars.pm_ctxsw_image_size = priv->constants.hwpm_ctx_size; - if (!g->gr.ctx_vars.golden_image_size || - !g->gr.ctx_vars.pm_ctxsw_image_size) { + g->gr->ctx_vars.golden_image_size = priv->constants.golden_ctx_size; + g->gr->ctx_vars.pm_ctxsw_image_size = priv->constants.hwpm_ctx_size; + if (!g->gr->ctx_vars.golden_image_size || + !g->gr->ctx_vars.pm_ctxsw_image_size) { return -ENXIO; } - g->gr.ctx_vars.zcull_image_size = priv->constants.zcull_ctx_size; - if (g->gr.ctx_vars.zcull_image_size == 0U) { + g->gr->ctx_vars.zcull_image_size = priv->constants.zcull_ctx_size; + if (g->gr->ctx_vars.zcull_image_size == 0U) { return -ENXIO; } - g->gr.ctx_vars.preempt_image_size = + g->gr->ctx_vars.preempt_image_size = priv->constants.preempt_ctx_size; - if (!g->gr.ctx_vars.preempt_image_size) { + if (!g->gr->ctx_vars.preempt_image_size) { return -EINVAL; } @@ -162,7 +162,7 @@ int vgpu_gr_init_ctx_state(struct gk20a *g) int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 size; nvgpu_log_fn(g, " "); @@ -185,8 +185,8 @@ int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g) NVGPU_GR_GLOBAL_CTX_PAGEPOOL, size); size = g->ops.gr.init.get_global_attr_cb_size(g, - nvgpu_gr_config_get_tpc_count(g->gr.config), - nvgpu_gr_config_get_max_tpc_count(g->gr.config)); + nvgpu_gr_config_get_tpc_count(g->gr->config), + nvgpu_gr_config_get_max_tpc_count(g->gr->config)); nvgpu_log_info(g, "attr_buffer_size : %u", size); nvgpu_gr_global_ctx_set_size(gr->global_ctx_buffer, @@ -212,7 +212,7 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) { struct gk20a *g = c->g; struct nvgpu_gr_ctx *gr_ctx = NULL; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; struct tsg_gk20a *tsg = NULL; int err = 0; @@ -329,7 +329,7 @@ out: return err; } -static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) +static int vgpu_gr_init_gr_config(struct gk20a *g, struct nvgpu_gr *gr) { struct vgpu_priv_data *priv = vgpu_get_priv_data(g); struct nvgpu_gr_config *config; @@ -385,7 +385,7 @@ static int vgpu_gr_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) if (g->ops.gr.config.get_gpc_tpc_mask) { gr->config->gpc_tpc_mask[gpc_index] = g->ops.gr.config.get_gpc_tpc_mask(g, - g->gr.config, gpc_index); + g->gr->config, gpc_index); } } @@ -467,7 +467,7 @@ cleanup: return err; } -static int vgpu_gr_init_gr_zcull(struct gk20a *g, struct gr_gk20a *gr, +static int vgpu_gr_init_gr_zcull(struct gk20a *g, struct nvgpu_gr *gr, u32 size) { nvgpu_log_fn(g, " "); @@ -585,20 +585,20 @@ u32 *vgpu_gr_rop_l2_en_mask(struct gk20a *g) nvgpu_log_fn(g, " "); - if (g->gr.fbp_rop_l2_en_mask == NULL) { - g->gr.fbp_rop_l2_en_mask = + if (g->gr->fbp_rop_l2_en_mask == NULL) { + g->gr->fbp_rop_l2_en_mask = nvgpu_kzalloc(g, max_fbps_count * sizeof(u32)); - if (!g->gr.fbp_rop_l2_en_mask) { + if (!g->gr->fbp_rop_l2_en_mask) { return NULL; } } - g->gr.max_fbps_count = max_fbps_count; + g->gr->max_fbps_count = max_fbps_count; for (i = 0; i < max_fbps_count; i++) { - g->gr.fbp_rop_l2_en_mask[i] = priv->constants.l2_en_mask[i]; + g->gr->fbp_rop_l2_en_mask[i] = priv->constants.l2_en_mask[i]; } - return g->gr.fbp_rop_l2_en_mask; + return g->gr->fbp_rop_l2_en_mask; } int vgpu_gr_add_zbc(struct gk20a *g, struct nvgpu_gr_zbc *zbc, @@ -677,8 +677,10 @@ int vgpu_gr_query_zbc(struct gk20a *g, struct nvgpu_gr_zbc *zbc, return 0; } -static void vgpu_remove_gr_support(struct gr_gk20a *gr) +static void vgpu_remove_gr_support(struct gk20a *g) { + struct nvgpu_gr *gr = g->gr; + nvgpu_log_fn(gr->g, " "); nvgpu_kfree(gr->g, gr->config->sm_to_cluster); @@ -694,7 +696,7 @@ static void vgpu_remove_gr_support(struct gr_gk20a *gr) static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; int err; nvgpu_log_fn(g, " "); @@ -707,7 +709,7 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) gr->g = g; #if defined(CONFIG_GK20A_CYCLE_STATS) - nvgpu_mutex_init(&g->gr.cs_lock); + nvgpu_mutex_init(&g->gr->cs_lock); #endif err = g->ops.gr.falcon.init_ctx_state(g); @@ -721,13 +723,13 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) } err = nvgpu_gr_obj_ctx_init(g, &gr->golden_image, - g->gr.ctx_vars.golden_image_size); + g->gr->ctx_vars.golden_image_size); if (err != 0) { goto clean_up; } - err = nvgpu_gr_hwpm_map_init(g, &g->gr.hwpm_map, - g->gr.ctx_vars.pm_ctxsw_image_size); + err = nvgpu_gr_hwpm_map_init(g, &g->gr->hwpm_map, + g->gr->ctx_vars.pm_ctxsw_image_size); if (err != 0) { nvgpu_err(g, "hwpm_map init failed"); goto clean_up; @@ -759,7 +761,7 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) clean_up: nvgpu_err(g, "fail"); - vgpu_remove_gr_support(gr); + vgpu_remove_gr_support(g); return err; } @@ -1090,7 +1092,7 @@ void vgpu_gr_handle_sm_esr_event(struct gk20a *g, { struct nvgpu_tsg_sm_error_state *sm_error_states; struct tsg_gk20a *tsg; - u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr.config); + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr->config); if (info->sm_id >= no_of_sm) { nvgpu_err(g, "invalid smd_id %d / %d", info->sm_id, no_of_sm); @@ -1183,7 +1185,7 @@ int vgpu_gr_init_fs_state(struct gk20a *g) return -EINVAL; } - return g->ops.gr.config.init_sm_id_table(g, g->gr.config); + return g->ops.gr.config.init_sm_id_table(g, g->gr->config); } int vgpu_gr_update_pc_sampling(struct channel_gk20a *ch, bool enable) @@ -1223,10 +1225,10 @@ void vgpu_gr_init_cyclestats(struct gk20a *g) /* cyclestats not supported on vgpu */ nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS, false); - g->gr.max_css_buffer_size = vgpu_css_get_buffer_size(g); + g->gr->max_css_buffer_size = vgpu_css_get_buffer_size(g); /* snapshots not supported if the buffer size is 0 */ - if (g->gr.max_css_buffer_size == 0) { + if (g->gr->max_css_buffer_size == 0) { snapshots_supported = false; } @@ -1290,12 +1292,12 @@ static int vgpu_gr_set_ctxsw_preemption_mode(struct gk20a *g, int err = 0; if (g->ops.class.is_valid_gfx(class) && - g->gr.ctx_vars.force_preemption_gfxp) { + g->gr->ctx_vars.force_preemption_gfxp) { graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; } if (g->ops.class.is_valid_compute(class) && - g->gr.ctx_vars.force_preemption_cilp) { + g->gr->ctx_vars.force_preemption_cilp) { compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; } @@ -1318,29 +1320,29 @@ static int vgpu_gr_set_ctxsw_preemption_mode(struct gk20a *g, u32 betacb_size = g->ops.gr.init.get_ctx_betacb_size(g); u32 attrib_cb_size = g->ops.gr.init.get_ctx_attrib_cb_size(g, betacb_size, - nvgpu_gr_config_get_tpc_count(g->gr.config), - nvgpu_gr_config_get_max_tpc_count(g->gr.config)); + nvgpu_gr_config_get_tpc_count(g->gr->config), + nvgpu_gr_config_get_max_tpc_count(g->gr->config)); struct nvgpu_mem *desc; nvgpu_log_info(g, "gfxp context preempt size=%d", - g->gr.ctx_vars.preempt_image_size); + g->gr->ctx_vars.preempt_image_size); nvgpu_log_info(g, "gfxp context spill size=%d", spill_size); nvgpu_log_info(g, "gfxp context pagepool size=%d", pagepool_size); nvgpu_log_info(g, "gfxp context attrib cb size=%d", attrib_cb_size); - nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc, + nvgpu_gr_ctx_set_size(g->gr->gr_ctx_desc, NVGPU_GR_CTX_PREEMPT_CTXSW, - g->gr.ctx_vars.preempt_image_size); - nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc, + g->gr->ctx_vars.preempt_image_size); + nvgpu_gr_ctx_set_size(g->gr->gr_ctx_desc, NVGPU_GR_CTX_SPILL_CTXSW, spill_size); - nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc, + nvgpu_gr_ctx_set_size(g->gr->gr_ctx_desc, NVGPU_GR_CTX_BETACB_CTXSW, attrib_cb_size); - nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc, + nvgpu_gr_ctx_set_size(g->gr->gr_ctx_desc, NVGPU_GR_CTX_PAGEPOOL_CTXSW, pagepool_size); err = nvgpu_gr_ctx_alloc_ctxsw_buffers(g, gr_ctx, - g->gr.gr_ctx_desc, vm); + g->gr->gr_ctx_desc, vm); if (err != 0) { nvgpu_err(g, "cannot allocate ctxsw buffers"); goto fail; diff --git a/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c index 9728457b6..0069c7626 100644 --- a/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c @@ -68,8 +68,8 @@ void vgpu_remove_support_common(struct gk20a *g) g->pmu.remove_support(&g->pmu); } - if (g->gr.remove_support) { - g->gr.remove_support(&g->gr); + if (g->gr->remove_support) { + g->gr->remove_support(g); } if (g->fifo.remove_support) { diff --git a/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.c index 6d1bc418f..3a21cba20 100644 --- a/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.c @@ -80,7 +80,7 @@ u32 vgpu_css_get_buffer_size(struct gk20a *g) return size; } -static int vgpu_css_init_snapshot_buffer(struct gr_gk20a *gr) +static int vgpu_css_init_snapshot_buffer(struct nvgpu_gr *gr) { struct gk20a *g = gr->g; struct gk20a_cs_snapshot *data = gr->cs_data; @@ -126,7 +126,7 @@ fail: return err; } -void vgpu_css_release_snapshot_buffer(struct gr_gk20a *gr) +void vgpu_css_release_snapshot_buffer(struct nvgpu_gr *gr) { struct gk20a_cs_snapshot *data = gr->cs_data; struct gk20a *g = gr->g; @@ -150,7 +150,7 @@ int vgpu_css_flush_snapshots(struct channel_gk20a *ch, struct gk20a *g = ch->g; struct tegra_vgpu_cmd_msg msg = {}; struct tegra_vgpu_channel_cyclestats_snapshot_params *p; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; struct gk20a_cs_snapshot *data = gr->cs_data; int err; @@ -238,7 +238,7 @@ int vgpu_css_enable_snapshot_buffer(struct channel_gk20a *ch, return ret; } - ret = vgpu_css_init_snapshot_buffer(&ch->g->gr); + ret = vgpu_css_init_snapshot_buffer(ch->g->gr); return ret; } diff --git a/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.h index f5e4a319d..f7cdaff99 100644 --- a/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.h +++ b/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.h @@ -25,11 +25,11 @@ #include -struct gr_gk20a; +struct nvgpu_gr; struct channel_gk20a; struct gk20a_cs_snapshot_client; -void vgpu_css_release_snapshot_buffer(struct gr_gk20a *gr); +void vgpu_css_release_snapshot_buffer(struct nvgpu_gr *gr); int vgpu_css_flush_snapshots(struct channel_gk20a *ch, u32 *pending, bool *hw_overflow); int vgpu_css_detach(struct channel_gk20a *ch, diff --git a/drivers/gpu/nvgpu/common/vgpu/vgpu.c b/drivers/gpu/nvgpu/common/vgpu/vgpu.c index 75b0c6822..7af0ea6d1 100644 --- a/drivers/gpu/nvgpu/common/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/vgpu.c @@ -237,8 +237,8 @@ void vgpu_remove_support_common(struct gk20a *g) g->pmu.remove_support(&g->pmu); } - if (g->gr.remove_support) { - g->gr.remove_support(&g->gr); + if (g->gr->remove_support) { + g->gr->remove_support(g); } if (g->fifo.remove_support) { diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 96383d347..d2aa93cc2 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -186,12 +186,12 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, gr_ctx = tsg->gr_ctx; if (mode != NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW) { - nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc, + nvgpu_gr_ctx_set_size(g->gr->gr_ctx_desc, NVGPU_GR_CTX_PM_CTX, - nvgpu_gr_hwpm_map_get_size(g->gr.hwpm_map)); + nvgpu_gr_hwpm_map_get_size(g->gr->hwpm_map)); ret = nvgpu_gr_ctx_alloc_pm_ctx(g, gr_ctx, - g->gr.gr_ctx_desc, c->vm, + g->gr->gr_ctx_desc, c->vm, gpu_va); if (ret != 0) { nvgpu_err(g, @@ -562,7 +562,7 @@ int gk20a_gr_isr(struct gk20a *g) struct tsg_gk20a *tsg = NULL; u32 global_esr = 0; u32 chid; - struct nvgpu_gr_config *gr_config = g->gr.config; + struct nvgpu_gr_config *gr_config = g->gr->config; u32 gr_intr = g->ops.gr.intr.read_pending_interrupts(g, &intr_info); u32 clear_intr = gr_intr; @@ -856,7 +856,7 @@ int gr_gk20a_split_ppc_broadcast_addr(struct gk20a *g, u32 addr, nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); for (ppc_num = 0; - ppc_num < nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc_num); + ppc_num < nvgpu_gr_config_get_gpc_ppc_count(g->gr->config, gpc_num); ppc_num++) { priv_addr_table[(*t)++] = pri_ppc_addr(g, pri_ppccs_addr_mask(addr), gpc_num, ppc_num); @@ -883,6 +883,7 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, u32 broadcast_flags; u32 t; int err; + struct nvgpu_gr_config *gr_config = g->gr->config; t = 0; *num_registers = 0; @@ -918,12 +919,12 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, * that we can look up the offsets. */ if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) != 0U) { for (gpc_num = 0; - gpc_num < nvgpu_gr_config_get_gpc_count(g->gr.config); + gpc_num < nvgpu_gr_config_get_gpc_count(gr_config); gpc_num++) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) { for (tpc_num = 0; - tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num); + tpc_num < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc_num); tpc_num++) { priv_addr_table[t++] = pri_tpc_addr(g, pri_tpccs_addr_mask(addr), @@ -943,7 +944,7 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, gpc_addr = pri_gpccs_addr_mask(priv_addr); tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); - if (tpc_num >= nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num)) { + if (tpc_num >= nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc_num)) { continue; } @@ -969,7 +970,7 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g, } else if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) == 0U) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) { for (tpc_num = 0; - tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num); + tpc_num < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc_num); tpc_num++) { priv_addr_table[t++] = pri_tpc_addr(g, pri_tpccs_addr_mask(addr), @@ -999,7 +1000,7 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, u32 *priv_registers; u32 num_registers = 0; int err = 0; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); u32 potential_offsets = nvgpu_gr_config_get_max_gpc_count(gr->config) * nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config) * @@ -1013,7 +1014,7 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, return -EINVAL; } - if (!g->gr.ctx_vars.golden_image_initialized) { + if (!g->gr->ctx_vars.golden_image_initialized) { return -ENODEV; } @@ -1041,7 +1042,7 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, num_registers = 1; } - if (!g->gr.ctx_vars.golden_image_initialized) { + if (!g->gr->ctx_vars.golden_image_initialized) { nvgpu_log_fn(g, "no context switch header info to work with"); err = -EINVAL; goto cleanup; @@ -1052,9 +1053,9 @@ int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g, priv_registers[i], is_quad, quad, nvgpu_gr_obj_ctx_get_local_golden_image_ptr( - g->gr.golden_image), + g->gr->golden_image), nvgpu_gr_obj_ctx_get_golden_image_size( - g->gr.golden_image), + g->gr->golden_image), &priv_offset); if (err != 0) { nvgpu_log_fn(g, "Could not determine priv_offset for addr:0x%x", @@ -1086,7 +1087,7 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g, u32 *priv_registers; u32 num_registers = 0; int err = 0; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); u32 potential_offsets = nvgpu_gr_config_get_max_gpc_count(gr->config) * nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config) * @@ -1099,7 +1100,7 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g, return -EINVAL; } - if (!g->gr.ctx_vars.golden_image_initialized) { + if (!g->gr->ctx_vars.golden_image_initialized) { return -ENODEV; } @@ -1124,14 +1125,14 @@ int gr_gk20a_get_pm_ctx_buffer_offsets(struct gk20a *g, num_registers = 1; } - if (!g->gr.ctx_vars.golden_image_initialized) { + if (!g->gr->ctx_vars.golden_image_initialized) { nvgpu_log_fn(g, "no context switch header info to work with"); err = -EINVAL; goto cleanup; } for (i = 0; i < num_registers; i++) { - err = nvgpu_gr_hwmp_map_find_priv_offset(g, g->gr.hwpm_map, + err = nvgpu_gr_hwmp_map_find_priv_offset(g, g->gr->hwpm_map, priv_registers[i], &priv_offset); if (err != 0) { @@ -1197,7 +1198,7 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, u32 addr, u32 data, struct nvgpu_gr_ctx *gr_ctx) { - u32 num_gpc = nvgpu_gr_config_get_gpc_count(g->gr.config); + u32 num_gpc = nvgpu_gr_config_get_gpc_count(g->gr->config); u32 num_tpc; u32 tpc, gpc, reg; u32 chk_addr; @@ -1214,7 +1215,7 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, for (reg = 0; reg < num_ovr_perf_regs; reg++) { for (gpc = 0; gpc < num_gpc; gpc++) { - num_tpc = nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc); + num_tpc = nvgpu_gr_config_get_gpc_tpc_count(g->gr->config, gpc); for (tpc = 0; tpc < num_tpc; tpc++) { chk_addr = ((gpc_stride * gpc) + (tpc_in_gpc_stride * tpc) + @@ -1286,7 +1287,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, u32 marker_size = 0; u32 control_register_stride = 0; u32 perf_register_stride = 0; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE); u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); @@ -1934,7 +1935,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, bool pm_ctx_ready = false; struct nvgpu_mem *current_mem = NULL; u32 i, j, offset, v; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); u32 max_offsets = nvgpu_gr_config_get_max_gpc_count(gr->config) * nvgpu_gr_config_get_max_tpc_per_gpc_count(gr->config) * @@ -2108,7 +2109,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, if ((current_mem == nvgpu_gr_ctx_get_ctx_mem(gr_ctx)) && (offsets[j] >= nvgpu_gr_obj_ctx_get_golden_image_size( - g->gr.golden_image))) { + g->gr->golden_image))) { continue; } if (pass == 0) { /* write pass */ @@ -2187,7 +2188,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, * at that point the hardware state can be inspected to * determine if the context we're interested in is current. */ - err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr->falcon); if (err != 0) { nvgpu_err(g, "unable to stop gr ctxsw"); /* this should probably be ctx-fatal... */ @@ -2204,7 +2205,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops, num_ctx_rd_ops, ch_is_curr_ctx); - tmp_err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); + tmp_err = g->ops.gr.falcon.enable_ctxsw(g, g->gr->falcon); if (tmp_err != 0) { nvgpu_err(g, "unable to restart ctxsw!"); err = tmp_err; @@ -2333,7 +2334,7 @@ void gk20a_gr_suspend_single_sm(struct gk20a *g, void gk20a_gr_suspend_all_sms(struct gk20a *g, u32 global_esr_mask, bool check_errors) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gpc, tpc, sm; int err; u32 dbgr_control0; @@ -2447,7 +2448,7 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g, int err; u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); - u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr.config); + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr->config); ops = nvgpu_kcalloc(g, no_of_sm, sizeof(*ops)); if (ops == NULL) { @@ -2462,7 +2463,7 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g, if ((sms & BIT64(sm_id)) == 0ULL) { continue; } - sm_info = nvgpu_gr_config_get_sm_info(g->gr.config, sm_id); + sm_info = nvgpu_gr_config_get_sm_info(g->gr->config, sm_id); gpc = nvgpu_gr_config_get_sm_info_gpc_index(sm_info); tpc = nvgpu_gr_config_get_sm_info_tpc_index(sm_info); @@ -2550,7 +2551,7 @@ int gr_gk20a_suspend_contexts(struct gk20a *g, nvgpu_mutex_acquire(&g->dbg_sessions_lock); - err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr->falcon); if (err != 0) { nvgpu_err(g, "unable to stop gr ctxsw"); goto clean_up; @@ -2570,7 +2571,7 @@ int gr_gk20a_suspend_contexts(struct gk20a *g, nvgpu_mutex_release(&dbg_s->ch_list_lock); - err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr->falcon); if (err != 0) { nvgpu_err(g, "unable to restart ctxsw!"); } @@ -2595,7 +2596,7 @@ int gr_gk20a_resume_contexts(struct gk20a *g, nvgpu_mutex_acquire(&g->dbg_sessions_lock); - err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr->falcon); if (err != 0) { nvgpu_err(g, "unable to stop gr ctxsw"); goto clean_up; @@ -2611,7 +2612,7 @@ int gr_gk20a_resume_contexts(struct gk20a *g, } } - err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr->falcon); if (err != 0) { nvgpu_err(g, "unable to restart ctxsw!"); } @@ -2645,7 +2646,7 @@ int gr_gk20a_trigger_suspend(struct gk20a *g) int gr_gk20a_wait_for_pause(struct gk20a *g, struct nvgpu_warpstate *w_state) { int err = 0; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gpc, tpc, sm, sm_id; u32 global_mask; u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(gr->config); @@ -2660,7 +2661,7 @@ int gr_gk20a_wait_for_pause(struct gk20a *g, struct nvgpu_warpstate *w_state) /* Lock down all SMs */ for (sm_id = 0; sm_id < no_of_sm; sm_id++) { struct sm_info *sm_info = - nvgpu_gr_config_get_sm_info(g->gr.config, sm_id); + nvgpu_gr_config_get_sm_info(g->gr->config, sm_id); gpc = nvgpu_gr_config_get_sm_info_gpc_index(sm_info); tpc = nvgpu_gr_config_get_sm_info_tpc_index(sm_info); sm = nvgpu_gr_config_get_sm_info_sm_index(sm_info); @@ -2701,7 +2702,7 @@ int gr_gk20a_clear_sm_errors(struct gk20a *g) { int ret = 0; u32 gpc, tpc, sm; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 global_esr; u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); @@ -2730,7 +2731,7 @@ int gr_gk20a_clear_sm_errors(struct gk20a *g) u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 sm_id, tpc_exception_en = 0; u32 offset, regval, tpc_offset, gpc_offset; u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); @@ -2739,7 +2740,7 @@ u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g) for (sm_id = 0; sm_id < no_of_sm; sm_id++) { struct sm_info *sm_info = - nvgpu_gr_config_get_sm_info(g->gr.config, sm_id); + nvgpu_gr_config_get_sm_info(g->gr->config, sm_id); tpc_offset = tpc_in_gpc_stride * nvgpu_gr_config_get_sm_info_tpc_index(sm_info); gpc_offset = gpc_stride * diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index e0885eed3..bcee6196a 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -31,14 +31,12 @@ #define GK20A_TIMEOUT_FPGA 100000U /* 100 sec */ -struct tsg_gk20a; struct nvgpu_gr_ctx; struct channel_gk20a; struct nvgpu_warpstate; struct nvgpu_gr_ctx_desc; struct nvgpu_gr_falcon; struct nvgpu_gr_global_ctx_buffer_desc; -struct nvgpu_gr_global_ctx_local_golden_image; struct nvgpu_gr_zbc; struct nvgpu_gr_hwpm_map; struct nvgpu_gr_isr_data; @@ -74,12 +72,6 @@ struct gk20a_cs_snapshot_client; struct gk20a_cs_snapshot; #endif -struct gr_ctx_buffer_desc { - void (*destroy)(struct gk20a *g, struct gr_ctx_buffer_desc *desc); - struct nvgpu_mem mem; - void *priv; -}; - struct nvgpu_preemption_modes_rec { u32 graphics_preemption_mode_flags; /* supported preemption modes */ u32 compute_preemption_mode_flags; /* supported preemption modes */ @@ -88,7 +80,7 @@ struct nvgpu_preemption_modes_rec { u32 default_compute_preempt_mode; /* default mode */ }; -struct gr_gk20a { +struct nvgpu_gr { struct gk20a *g; struct { bool golden_image_initialized; @@ -138,7 +130,7 @@ struct gr_gk20a { u32 channel_tlb_flush_index; struct nvgpu_spinlock ch_tlb_lock; - void (*remove_support)(struct gr_gk20a *gr); + void (*remove_support)(struct gk20a *g); bool sw_ready; u32 fecs_feature_override_ecc_val; @@ -292,9 +284,4 @@ int gr_gk20a_get_offset_in_gpccs_segment(struct gk20a *g, enum ctxsw_addr_type addr_type, u32 num_tpcs, u32 num_ppcs, u32 reg_list_ppc_count, u32 *__offset_in_segment); -void gk20a_gr_destroy_ctx_buffer(struct gk20a *g, - struct gr_ctx_buffer_desc *desc); -int gk20a_gr_alloc_ctx_buffer(struct gk20a *g, - struct gr_ctx_buffer_desc *desc, size_t size); - #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 487637772..11999bca7 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -92,7 +92,7 @@ fail: void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gpc_index, ppc_index, stride, val; u32 pd_ab_max_output; u32 alpha_cb_size = data * 4U; @@ -149,7 +149,7 @@ void gr_gm20b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) void gr_gm20b_set_circular_buffer_size(struct gk20a *g, u32 data) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gpc_index, ppc_index, stride, val; u32 cb_size = data * 4U; u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); @@ -291,10 +291,10 @@ void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) nvgpu_tegra_fuse_write_bypass(g, 0x1); nvgpu_tegra_fuse_write_access_sw(g, 0x0); - if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == 0x1U) { + if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr->config, gpc_index) == 0x1U) { nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x0); nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x1); - } else if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == + } else if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr->config, gpc_index) == 0x2U) { nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1); nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(g, 0x0); @@ -344,7 +344,7 @@ u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr) int gr_gm20b_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gr_engine_id; struct nvgpu_engine_status_info engine_status; @@ -510,7 +510,7 @@ int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; unsigned long i; u32 tmp, max_fbps_count, max_ltc_per_fbp; unsigned long fbp_en_mask; @@ -535,7 +535,7 @@ void gr_gm20b_init_cyclestats(struct gk20a *g) #if defined(CONFIG_GK20A_CYCLE_STATS) nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_CYCLE_STATS_SNAPSHOT, true); - g->gr.max_css_buffer_size = 0xffffffffU; + g->gr->max_css_buffer_size = 0xffffffffU; #else (void)g; #endif @@ -545,7 +545,7 @@ void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state) { /* Check if we have at least one valid warp */ /* get paused state on maxwell */ - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gpc, tpc, sm_id; u32 tpc_offset, gpc_offset, reg_offset; u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; @@ -695,7 +695,7 @@ int gm20b_gr_clear_sm_error_state(struct gk20a *g, (void) memset(&tsg->sm_error_states[sm_id], 0, sizeof(*tsg->sm_error_states)); - err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr->falcon); if (err != 0) { nvgpu_err(g, "unable to stop gr ctxsw"); goto fail; @@ -703,7 +703,7 @@ int gm20b_gr_clear_sm_error_state(struct gk20a *g, if (gk20a_is_channel_ctx_resident(ch)) { struct sm_info *sm_info = - nvgpu_gr_config_get_sm_info(g->gr.config, sm_id); + nvgpu_gr_config_get_sm_info(g->gr->config, sm_id); gpc = nvgpu_gr_config_get_sm_info_gpc_index(sm_info); tpc = nvgpu_gr_config_get_sm_info_tpc_index(sm_info); @@ -716,7 +716,7 @@ int gm20b_gr_clear_sm_error_state(struct gk20a *g, 0); } - err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr->falcon); fail: nvgpu_mutex_release(&g->dbg_sessions_lock); diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 2acc6565c..41ceaf8a6 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -341,7 +341,7 @@ fail: void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gpc_index, ppc_index, stride, val; u32 pd_ab_max_output; u32 alpha_cb_size = data * 4U; @@ -396,7 +396,7 @@ void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gpc_index, ppc_index, stride, val; u32 cb_size_steady = data * 4U, cb_size; u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); @@ -472,7 +472,7 @@ void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data) int gr_gp10b_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gr_engine_id; struct nvgpu_engine_status_info engine_status; @@ -609,9 +609,9 @@ void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) nvgpu_tegra_fuse_write_bypass(g, 0x1); nvgpu_tegra_fuse_write_access_sw(g, 0x0); - if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == 0x1U) { + if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr->config, gpc_index) == 0x1U) { nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x2); - } else if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == + } else if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr->config, gpc_index) == 0x2U) { nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(g, 0x1); } else { @@ -709,7 +709,7 @@ int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, /* set cilp_preempt_pending = true and record the channel */ nvgpu_gr_ctx_set_cilp_preempt_pending(gr_ctx, true); - g->gr.cilp_preempt_pending_chid = fault_ch->chid; + g->gr->cilp_preempt_pending_chid = fault_ch->chid; g->ops.tsg.post_event_id(tsg, NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED); @@ -741,7 +741,7 @@ static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g, } nvgpu_gr_ctx_set_cilp_preempt_pending(gr_ctx, false); - g->gr.cilp_preempt_pending_chid = FIFO_INVAL_CHANNEL_ID; + g->gr->cilp_preempt_pending_chid = FIFO_INVAL_CHANNEL_ID; return 0; } @@ -871,7 +871,7 @@ static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, u32 *__chid) u32 chid; int ret = -EINVAL; - chid = g->gr.cilp_preempt_pending_chid; + chid = g->gr->cilp_preempt_pending_chid; if (chid == FIFO_INVAL_CHANNEL_ID) { return ret; } @@ -1053,7 +1053,7 @@ int gr_gp10b_suspend_contexts(struct gk20a *g, nvgpu_mutex_acquire(&g->dbg_sessions_lock); - err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr->falcon); if (err != 0) { nvgpu_err(g, "unable to stop gr ctxsw"); nvgpu_mutex_release(&g->dbg_sessions_lock); @@ -1078,7 +1078,7 @@ int gr_gp10b_suspend_contexts(struct gk20a *g, nvgpu_mutex_release(&dbg_s->ch_list_lock); - err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr->falcon); if (err != 0) { nvgpu_mutex_release(&g->dbg_sessions_lock); goto clean_up; @@ -1189,7 +1189,7 @@ int gr_gp10b_get_preemption_mode_flags(struct gk20a *g, void gr_gp10b_init_gfxp_wfi_timeout_count(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; gr->gfxp_wfi_timeout_count = GFXP_WFI_TIMEOUT_COUNT_DEFAULT; } diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index bc0c6221b..f6f9943b7 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -164,8 +164,8 @@ void gr_gv100_init_hwpm_pmm_register(struct gk20a *g) g->ops.gr.set_pmm_register(g, perf_pmmsys_engine_sel_r(0), 0xFFFFFFFFU, 1U, num_sys_perfmon); g->ops.gr.set_pmm_register(g, perf_pmmfbp_engine_sel_r(0), - 0xFFFFFFFFU, g->gr.num_fbps, num_fbp_perfmon); + 0xFFFFFFFFU, g->gr->num_fbps, num_fbp_perfmon); g->ops.gr.set_pmm_register(g, perf_pmmgpc_engine_sel_r(0), - 0xFFFFFFFFU, nvgpu_gr_config_get_gpc_count(g->gr.config), + 0xFFFFFFFFU, nvgpu_gr_config_get_gpc_count(g->gr->config), num_gpc_perfmon); } diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 5526edb7b..76a024a0a 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -821,7 +821,7 @@ fail: void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gpc_index, ppc_index, stride, val; u32 pd_ab_max_output; u32 alpha_cb_size = data * 4U; @@ -873,7 +873,7 @@ void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gpc_index, ppc_index, stride, val; u32 cb_size_steady = data * 4U, cb_size; u32 attrib_cb_size = g->ops.gr.init.get_attrib_cb_size(g, @@ -1009,11 +1009,11 @@ static void gr_gv11b_dump_gr_sm_regs(struct gk20a *g, gk20a_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_1_r())); sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); - for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr->config); gpc++) { gpc_offset = nvgpu_gr_gpc_offset(g, gpc); for (tpc = 0; - tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc); + tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr->config, gpc); tpc++) { tpc_offset = nvgpu_gr_tpc_offset(g, tpc); @@ -1031,7 +1031,7 @@ static void gr_gv11b_dump_gr_sm_regs(struct gk20a *g, int gr_gv11b_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gr_engine_id; struct nvgpu_engine_status_info engine_status; @@ -1174,18 +1174,18 @@ void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) { u32 fuse_val; - if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index) == 0U) { + if (nvgpu_gr_config_get_gpc_tpc_mask(g->gr->config, gpc_index) == 0U) { return; } /* - * For s/w value nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index), bit value 1 indicates + * For s/w value nvgpu_gr_config_get_gpc_tpc_mask(g->gr->config, gpc_index), bit value 1 indicates * corresponding TPC is enabled. But for h/w fuse register, bit value 1 * indicates corresponding TPC is disabled. * So we need to flip the bits and ensure we don't write to bits greater * than TPC count */ - fuse_val = nvgpu_gr_config_get_gpc_tpc_mask(g->gr.config, gpc_index); + fuse_val = nvgpu_gr_config_get_gpc_tpc_mask(g->gr->config, gpc_index); fuse_val = ~fuse_val; fuse_val = fuse_val & 0xfU; /* tpc0_disable fuse is only 4-bit wide */ @@ -1666,7 +1666,7 @@ void gv11b_gr_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state) /* Check if we have at least one valid warp * get paused state on maxwell */ - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gpc, tpc, sm, sm_id; u32 offset; u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; @@ -1737,7 +1737,7 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g, { struct nvgpu_dbg_reg_op *ops; unsigned int i = 0, sm_id; - u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr.config); + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr->config); int err; ops = nvgpu_kcalloc(g, no_of_sm, sizeof(*ops)); @@ -1753,13 +1753,13 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g, continue; } - sm_info = nvgpu_gr_config_get_sm_info(g->gr.config, sm_id); + sm_info = nvgpu_gr_config_get_sm_info(g->gr->config, sm_id); gpc = nvgpu_gr_config_get_sm_info_gpc_index(sm_info); if (g->ops.gr.init.get_nonpes_aware_tpc != NULL) { tpc = g->ops.gr.init.get_nonpes_aware_tpc(g, nvgpu_gr_config_get_sm_info_gpc_index(sm_info), nvgpu_gr_config_get_sm_info_tpc_index(sm_info), - g->gr.config); + g->gr->config); } else { tpc = nvgpu_gr_config_get_sm_info_tpc_index(sm_info); } @@ -1968,7 +1968,7 @@ void gv11b_gr_suspend_single_sm(struct gk20a *g, void gv11b_gr_suspend_all_sms(struct gk20a *g, u32 global_esr_mask, bool check_errors) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gpc, tpc, sm; int err; u32 dbgr_control0; @@ -2716,13 +2716,13 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, if ((broadcast_flags & PRI_BROADCAST_FLAGS_EGPC) != 0U) { nvgpu_log_info(g, "broadcast flags egpc"); for (gpc_num = 0; - gpc_num < nvgpu_gr_config_get_gpc_count(g->gr.config); + gpc_num < nvgpu_gr_config_get_gpc_count(g->gr->config); gpc_num++) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) != 0U) { nvgpu_log_info(g, "broadcast flags etpc"); for (tpc_num = 0; - tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num); + tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr->config, gpc_num); tpc_num++) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_SMPC) != 0U) { @@ -2752,7 +2752,7 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, gpc_addr = pri_gpccs_addr_mask(priv_addr); tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); - if (tpc_num >= nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num)) { + if (tpc_num >= nvgpu_gr_config_get_gpc_tpc_count(g->gr->config, gpc_num)) { continue; } @@ -2766,7 +2766,7 @@ void gv11b_gr_egpc_etpc_priv_addr_table(struct gk20a *g, u32 addr, if ((broadcast_flags & PRI_BROADCAST_FLAGS_ETPC) != 0U) { nvgpu_log_info(g, "broadcast flags etpc but not egpc"); for (tpc_num = 0; - tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num); + tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr->config, gpc_num); tpc_num++) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_SMPC) != 0U) { @@ -2804,14 +2804,14 @@ u32 gv11b_gr_get_egpc_base(struct gk20a *g) void gr_gv11b_init_gfxp_wfi_timeout_count(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; gr->gfxp_wfi_timeout_unit_usec = true; gr->gfxp_wfi_timeout_count = GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT; } unsigned long gr_gv11b_get_max_gfxp_wfi_timeout_count(struct gk20a *g) { - if (g->gr.gfxp_wfi_timeout_unit_usec) { + if (g->gr->gfxp_wfi_timeout_unit_usec) { /* 100 msec in usec count */ return (100UL * 1000UL); } else { @@ -2963,7 +2963,7 @@ static void gr_gv11b_split_pmm_fbp_broadcast_address(struct gk20a *g, u32 fbp_num = 0; u32 base = 0; - for (fbp_num = 0; fbp_num < g->gr.num_fbps; fbp_num++) { + for (fbp_num = 0; fbp_num < g->gr->num_fbps; fbp_num++) { base = perf_pmmfbp_base_v() + (fbp_num * g->ops.perf.get_pmm_per_chiplet_offset()); @@ -3028,12 +3028,12 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g, */ if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) != 0U) { for (gpc_num = 0; - gpc_num < nvgpu_gr_config_get_gpc_count(g->gr.config); + gpc_num < nvgpu_gr_config_get_gpc_count(g->gr->config); gpc_num++) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) { for (tpc_num = 0; - tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num); + tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr->config, gpc_num); tpc_num++) { priv_addr_table[t++] = pri_tpc_addr(g, @@ -3055,7 +3055,7 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g, gpc_addr = pri_gpccs_addr_mask(priv_addr); tpc_num = g->ops.gr.get_tpc_num(g, gpc_addr); - if (tpc_num >= nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num)) { + if (tpc_num >= nvgpu_gr_config_get_gpc_tpc_count(g->gr->config, gpc_num)) { continue; } @@ -3094,7 +3094,7 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g, } for (gpc_num = 0; - gpc_num < nvgpu_gr_config_get_gpc_count(g->gr.config); + gpc_num < nvgpu_gr_config_get_gpc_count(g->gr->config); gpc_num++) { for (domain_idx = pmm_domain_start; domain_idx < (pmm_domain_start + num_domains); @@ -3148,7 +3148,7 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g, } else if ((broadcast_flags & PRI_BROADCAST_FLAGS_GPC) == 0U) { if ((broadcast_flags & PRI_BROADCAST_FLAGS_TPC) != 0U) { for (tpc_num = 0; - tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc_num); + tpc_num < nvgpu_gr_config_get_gpc_tpc_count(g->gr->config, gpc_num); tpc_num++) { priv_addr_table[t++] = pri_tpc_addr(g, @@ -3185,7 +3185,7 @@ int gv11b_gr_clear_sm_error_state(struct gk20a *g, (void)memset(&tsg->sm_error_states[sm_id], 0, sizeof(*tsg->sm_error_states)); - err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr->falcon); if (err != 0) { nvgpu_err(g, "unable to stop gr ctxsw"); goto fail; @@ -3193,14 +3193,14 @@ int gv11b_gr_clear_sm_error_state(struct gk20a *g, if (gk20a_is_channel_ctx_resident(ch)) { struct sm_info *sm_info = - nvgpu_gr_config_get_sm_info(g->gr.config, sm_id); + nvgpu_gr_config_get_sm_info(g->gr->config, sm_id); gpc = nvgpu_gr_config_get_sm_info_gpc_index(sm_info); if (g->ops.gr.init.get_nonpes_aware_tpc != NULL) { tpc = g->ops.gr.init.get_nonpes_aware_tpc(g, nvgpu_gr_config_get_sm_info_gpc_index(sm_info), nvgpu_gr_config_get_sm_info_tpc_index(sm_info), - g->gr.config); + g->gr->config); } else { tpc = nvgpu_gr_config_get_sm_info_tpc_index(sm_info); } @@ -3217,7 +3217,7 @@ int gv11b_gr_clear_sm_error_state(struct gk20a *g, 0); } - err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr->falcon); fail: nvgpu_mutex_release(&g->dbg_sessions_lock); diff --git a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c index 660c40dc3..140f461ec 100644 --- a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c @@ -667,10 +667,10 @@ int gm20b_gr_falcon_init_ctx_state(struct gk20a *g) nvgpu_log_fn(g, " "); /* query ctxsw image sizes, if golden context is not created */ - if (!g->gr.ctx_vars.golden_image_initialized) { + if (!g->gr->ctx_vars.golden_image_initialized) { ret = gm20b_gr_falcon_ctrl_ctxsw(g, NVGPU_GR_FALCON_METHOD_CTXSW_DISCOVER_IMAGE_SIZE, - 0, &g->gr.ctx_vars.golden_image_size); + 0, &g->gr->ctx_vars.golden_image_size); if (ret != 0) { nvgpu_err(g, "query golden image size failed"); @@ -678,7 +678,7 @@ int gm20b_gr_falcon_init_ctx_state(struct gk20a *g) } ret = gm20b_gr_falcon_ctrl_ctxsw(g, NVGPU_GR_FALCON_METHOD_CTXSW_DISCOVER_PM_IMAGE_SIZE, - 0, &g->gr.ctx_vars.pm_ctxsw_image_size); + 0, &g->gr->ctx_vars.pm_ctxsw_image_size); if (ret != 0) { nvgpu_err(g, "query pm ctx image size failed"); @@ -686,7 +686,7 @@ int gm20b_gr_falcon_init_ctx_state(struct gk20a *g) } ret = gm20b_gr_falcon_ctrl_ctxsw(g, NVGPU_GR_FALCON_METHOD_CTXSW_DISCOVER_ZCULL_IMAGE_SIZE, - 0, &g->gr.ctx_vars.zcull_image_size); + 0, &g->gr->ctx_vars.zcull_image_size); if (ret != 0) { nvgpu_err(g, "query zcull ctx image size failed"); @@ -744,7 +744,7 @@ int gm20b_gr_falcon_submit_fecs_method_op(struct gk20a *g, bool sleepduringwait) { int ret; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; nvgpu_mutex_acquire(&gr->falcon->fecs_mutex); @@ -786,7 +786,7 @@ int gm20b_gr_falcon_submit_fecs_sideband_method_op(struct gk20a *g, struct nvgpu_fecs_method_op op) { int ret; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; nvgpu_mutex_acquire(&gr->falcon->fecs_mutex); diff --git a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gp10b.c b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gp10b.c index 2bf4c03d0..91d77608c 100644 --- a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gp10b.c +++ b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gp10b.c @@ -40,10 +40,10 @@ int gp10b_gr_falcon_init_ctx_state(struct gk20a *g) return err; } - if (g->gr.ctx_vars.preempt_image_size == 0U) { + if (g->gr->ctx_vars.preempt_image_size == 0U) { err = g->ops.gr.falcon.ctrl_ctxsw(g, NVGPU_GR_FALCON_METHOD_PREEMPT_IMAGE_SIZE, 0U, - &g->gr.ctx_vars.preempt_image_size); + &g->gr->ctx_vars.preempt_image_size); if (err != 0) { nvgpu_err(g, "query preempt image size failed"); return err; @@ -51,7 +51,7 @@ int gp10b_gr_falcon_init_ctx_state(struct gk20a *g) } nvgpu_log_info(g, "preempt image size: %u", - g->gr.ctx_vars.preempt_image_size); + g->gr->ctx_vars.preempt_image_size); nvgpu_log_fn(g, "done"); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c index e70ad10f7..6f669c51f 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c @@ -215,10 +215,10 @@ int gp10b_gr_init_fs_state(struct gk20a *g) gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f()); nvgpu_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), data); - if (g->gr.fecs_feature_override_ecc_val != 0U) { + if (g->gr->fecs_feature_override_ecc_val != 0U) { nvgpu_writel(g, gr_fecs_feature_override_ecc_r(), - g->gr.fecs_feature_override_ecc_val); + g->gr->fecs_feature_override_ecc_val); } gm20b_gr_init_fs_state(g); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c index 5ac643011..05ad913f5 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c @@ -593,10 +593,10 @@ int gv11b_gr_init_fs_state(struct gk20a *g) gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f()); nvgpu_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), data); - if (g->gr.fecs_feature_override_ecc_val != 0U) { + if (g->gr->fecs_feature_override_ecc_val != 0U) { nvgpu_writel(g, gr_fecs_feature_override_ecc_r(), - g->gr.fecs_feature_override_ecc_val); + g->gr->fecs_feature_override_ecc_val); } data = nvgpu_readl(g, gr_debug_0_r()); diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b.c b/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b.c index 45fee58b6..bd9aceba6 100644 --- a/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b.c @@ -177,7 +177,7 @@ int gm20b_determine_L2_size_bytes(struct gk20a *g) sets = 0U; } - active_ltcs = g->gr.num_fbps; + active_ltcs = g->gr->num_fbps; /* chip-specific values */ lts_per_ltc = 2U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/cyclestats_snapshot.h b/drivers/gpu/nvgpu/include/nvgpu/cyclestats_snapshot.h index 4b421aa92..d0c5d1bb7 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/cyclestats_snapshot.h +++ b/drivers/gpu/nvgpu/include/nvgpu/cyclestats_snapshot.h @@ -34,7 +34,7 @@ #define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024) struct gk20a; -struct gr_gk20a; +struct nvgpu_gr; struct channel_gk20a; /* cycle stats fifo header (must match NvSnapshotBufferFifo) */ @@ -139,7 +139,7 @@ u32 nvgpu_css_get_pending_snapshots(struct gk20a *g); void nvgpu_css_set_handled_snapshots(struct gk20a *g, u32 done); int nvgpu_css_enable_snapshot(struct channel_gk20a *ch, struct gk20a_cs_snapshot_client *cs_client); -void nvgpu_css_disable_snapshot(struct gr_gk20a *gr); +void nvgpu_css_disable_snapshot(struct nvgpu_gr *gr); u32 nvgpu_css_allocate_perfmon_ids(struct gk20a_cs_snapshot *data, u32 count); u32 nvgpu_css_release_perfmon_ids(struct gk20a_cs_snapshot *data, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index f2146e89e..a14c25780 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -27,7 +27,7 @@ struct gk20a; struct fifo_gk20a; struct channel_gk20a; -struct gr_gk20a; +struct nvgpu_gr; struct sim_nvgpu; struct gk20a_ce_app; struct gk20a_ctxsw_trace; @@ -1703,7 +1703,7 @@ struct gpu_ops { struct { int (*enable_snapshot)(struct channel_gk20a *ch, struct gk20a_cs_snapshot_client *client); - void (*disable_snapshot)(struct gr_gk20a *gr); + void (*disable_snapshot)(struct nvgpu_gr *gr); int (*check_data_available)(struct channel_gk20a *ch, u32 *pending, bool *hw_overflow); @@ -2036,7 +2036,7 @@ struct gk20a { struct clk_gk20a clk; struct fifo_gk20a fifo; struct nvgpu_nvlink_dev nvlink; - struct gr_gk20a gr; + struct nvgpu_gr *gr; struct sim_nvgpu *sim; struct mm_gk20a mm; struct nvgpu_pmu pmu; diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h index 2e62f30e1..6528952fb 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h @@ -35,5 +35,7 @@ int nvgpu_gr_suspend(struct gk20a *g); void nvgpu_gr_flush_channel_tlb(struct gk20a *g); void nvgpu_gr_wait_initialized(struct gk20a *g); void nvgpu_gr_init(struct gk20a *g); +int nvgpu_gr_alloc(struct gk20a *g); +void nvgpu_gr_free(struct gk20a *g); #endif /* NVGPU_GR_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/zcull.h b/drivers/gpu/nvgpu/include/nvgpu/gr/zcull.h index de117a9b5..b3e21c7eb 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/zcull.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/zcull.h @@ -45,7 +45,7 @@ struct nvgpu_gr_zcull_info { }; int nvgpu_gr_zcull_init(struct gk20a *g, struct nvgpu_gr_zcull **gr_zcull, - u32 size); + u32 size, struct nvgpu_gr_config *gr_config); void nvgpu_gr_zcull_deinit(struct gk20a *g, struct nvgpu_gr_zcull *gr_zcull); u32 nvgpu_gr_get_ctxsw_zcull_size(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/os/linux/debug.c b/drivers/gpu/nvgpu/os/linux/debug.c index d1b832087..e40b82579 100644 --- a/drivers/gpu/nvgpu/os/linux/debug.c +++ b/drivers/gpu/nvgpu/os/linux/debug.c @@ -413,17 +413,17 @@ void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink) l->debugfs_force_preemption_gfxp = debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR, l->debugfs, - &g->gr.ctx_vars.force_preemption_gfxp); + &g->gr->ctx_vars.force_preemption_gfxp); l->debugfs_force_preemption_cilp = debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR, l->debugfs, - &g->gr.ctx_vars.force_preemption_cilp); + &g->gr->ctx_vars.force_preemption_cilp); l->debugfs_dump_ctxsw_stats = debugfs_create_bool("dump_ctxsw_stats_on_channel_close", S_IRUGO|S_IWUSR, l->debugfs, - &g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close); + &g->gr->ctx_vars.dump_ctxsw_stats_on_channel_close); gr_gk20a_debugfs_init(g); gk20a_pmu_debugfs_init(g); diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c index 59b707022..d54236758 100644 --- a/drivers/gpu/nvgpu/os/linux/driver_common.c +++ b/drivers/gpu/nvgpu/os/linux/driver_common.c @@ -309,6 +309,9 @@ static void nvgpu_free_gk20a(struct gk20a *g) { struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + /* free gr memory */ + nvgpu_gr_free(g); + kfree(l); } diff --git a/drivers/gpu/nvgpu/os/linux/fecs_trace_linux.c b/drivers/gpu/nvgpu/os/linux/fecs_trace_linux.c index 65114f272..184e91e81 100644 --- a/drivers/gpu/nvgpu/os/linux/fecs_trace_linux.c +++ b/drivers/gpu/nvgpu/os/linux/fecs_trace_linux.c @@ -604,7 +604,7 @@ int gk20a_ctxsw_trace_init(struct gk20a *g) if (unlikely(err)) goto fail; - g->gr.max_ctxsw_ring_buffer_size = GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE; + g->gr->max_ctxsw_ring_buffer_size = GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE; return 0; diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c index 01b00aa70..61347031c 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c @@ -757,9 +757,8 @@ static int gk20a_channel_zcull_bind(struct channel_gk20a *ch, struct nvgpu_zcull_bind_args *args) { struct gk20a *g = ch->g; - struct gr_gk20a *gr = &g->gr; - nvgpu_log_fn(gr->g, " "); + nvgpu_log_fn(g, " "); return g->ops.gr.setup.bind_ctxsw_zcull(g, ch, args->gpu_va, args->mode); diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index a733ab2e5..9986b9a50 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -285,11 +285,11 @@ gk20a_ctrl_ioctl_gpu_characteristics( gpu.L2_cache_size = g->ops.ltc.determine_L2_size_bytes(g); gpu.on_board_video_memory_size = 0; /* integrated GPU */ - gpu.num_gpc = nvgpu_gr_config_get_gpc_count(g->gr.config); - gpu.max_gpc_count = nvgpu_gr_config_get_max_gpc_count(g->gr.config); - gpu.gpc_mask = nvgpu_gr_config_get_gpc_mask(g->gr.config); + gpu.num_gpc = nvgpu_gr_config_get_gpc_count(g->gr->config); + gpu.max_gpc_count = nvgpu_gr_config_get_max_gpc_count(g->gr->config); + gpu.gpc_mask = nvgpu_gr_config_get_gpc_mask(g->gr->config); - gpu.num_tpc_per_gpc = nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config); + gpu.num_tpc_per_gpc = nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr->config); gpu.bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ @@ -324,8 +324,8 @@ gk20a_ctrl_ioctl_gpu_characteristics( gpu.sm_arch_spa_version = g->params.sm_arch_spa_version; gpu.sm_arch_warp_count = g->params.sm_arch_warp_count; - gpu.max_css_buffer_size = g->gr.max_css_buffer_size; - gpu.max_ctxsw_ring_buffer_size = g->gr.max_ctxsw_ring_buffer_size; + gpu.max_css_buffer_size = g->gr->max_css_buffer_size; + gpu.max_ctxsw_ring_buffer_size = g->gr->max_ctxsw_ring_buffer_size; gpu.gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST; gpu.tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST; @@ -550,7 +550,7 @@ clean_up: static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, struct nvgpu_gpu_get_tpc_masks_args *args) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; int err = 0; const u32 gpc_tpc_mask_size = sizeof(u32) * nvgpu_gr_config_get_max_gpc_count(gr->config); @@ -577,7 +577,7 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, static int gk20a_ctrl_get_fbp_l2_masks( struct gk20a *g, struct nvgpu_gpu_get_fbp_l2_masks_args *args) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; int err = 0; const u32 fbp_l2_mask_size = sizeof(u32) * gr->max_fbps_count; @@ -689,8 +689,8 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, struct nvgpu_warpstate *w_state = NULL; u32 sm_count, ioctl_size, size, sm_id, no_of_sm; - sm_count = nvgpu_gr_config_get_gpc_count(g->gr.config) * - nvgpu_gr_config_get_tpc_count(g->gr.config); + sm_count = nvgpu_gr_config_get_gpc_count(g->gr->config) * + nvgpu_gr_config_get_tpc_count(g->gr->config); ioctl_size = sm_count * sizeof(struct warpstate); ioctl_w_state = nvgpu_kzalloc(g, ioctl_size); @@ -711,7 +711,7 @@ static int nvgpu_gpu_ioctl_wait_for_pause(struct gk20a *g, nvgpu_mutex_acquire(&g->dbg_sessions_lock); g->ops.gr.wait_for_pause(g, w_state); - no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr.config); + no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr->config); for (sm_id = 0; sm_id < no_of_sm; sm_id++) { ioctl_w_state[sm_id].valid_warps[0] = @@ -795,7 +795,7 @@ static int nvgpu_gpu_ioctl_has_any_exception( static int gk20a_ctrl_get_num_vsms(struct gk20a *g, struct nvgpu_gpu_num_vsms *args) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; args->num_vsms = nvgpu_gr_config_get_no_of_sm(gr->config); return 0; } @@ -804,7 +804,7 @@ static int gk20a_ctrl_vsm_mapping(struct gk20a *g, struct nvgpu_gpu_vsms_mapping *args) { int err = 0; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(gr->config); size_t write_size = no_of_sm * sizeof(struct nvgpu_gpu_vsms_mapping_entry); @@ -1677,7 +1677,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE: get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf; - get_ctx_size_args->size = nvgpu_gr_get_ctxsw_zcull_size(g, g->gr.zcull); + get_ctx_size_args->size = nvgpu_gr_get_ctxsw_zcull_size(g, g->gr->zcull); break; case NVGPU_GPU_IOCTL_ZCULL_GET_INFO: @@ -1690,8 +1690,8 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg if (zcull_info == NULL) return -ENOMEM; - err = g->ops.gr.zcull.get_zcull_info(g, g->gr.config, - g->gr.zcull, zcull_info); + err = g->ops.gr.zcull.get_zcull_info(g, g->gr->config, + g->gr->zcull, zcull_info); if (err) { nvgpu_kfree(g, zcull_info); break; @@ -1742,7 +1742,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg if (!err) { err = gk20a_busy(g); if (!err) { - err = g->ops.gr.zbc.set_table(g, g->gr.zbc, + err = g->ops.gr.zbc.set_table(g, g->gr->zbc, zbc_val); gk20a_idle(g); } @@ -1761,7 +1761,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg zbc_tbl->type = query_table_args->type; zbc_tbl->index_size = query_table_args->index_size; - err = g->ops.gr.zbc.query_table(g, g->gr.zbc, zbc_tbl); + err = g->ops.gr.zbc.query_table(g, g->gr->zbc, zbc_tbl); if (!err) { switch (zbc_tbl->type) { diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index d3ac492b7..900f6276f 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -266,7 +266,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args) { struct gk20a *g = dbg_s->g; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; struct nvgpu_tsg_sm_error_state *sm_error_state; struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record; struct channel_gk20a *ch; @@ -1101,7 +1101,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( nvgpu_mutex_acquire(&g->dbg_sessions_lock); /* Suspend GPU context switching */ - err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr->falcon); if (err) { nvgpu_err(g, "unable to stop gr ctxsw"); /* this should probably be ctx-fatal... */ @@ -1119,7 +1119,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( break; } - err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr->falcon); if (err) nvgpu_err(g, "unable to restart ctxsw!"); @@ -1451,7 +1451,7 @@ static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state( struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *args) { struct gk20a *g = dbg_s->g; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 sm_id; struct channel_gk20a *ch; int err = 0; diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c index ec47767d7..f3fbe77ba 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c @@ -62,7 +62,7 @@ static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g, struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); struct gk20a_sched_ctrl *sched = &l->sched_ctrl; struct channel_gk20a *ch; - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; int err = 0; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid); @@ -558,7 +558,7 @@ static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g, struct tsg_gk20a *tsg, struct nvgpu_tsg_read_single_sm_error_state_args *args) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; struct nvgpu_tsg_sm_error_state *sm_error_state; struct nvgpu_tsg_sm_error_state_record sm_error_state_record; u32 sm_id; diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c index 66f58bf35..329819496 100644 --- a/drivers/gpu/nvgpu/os/linux/module.c +++ b/drivers/gpu/nvgpu/os/linux/module.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "platform_gk20a.h" #include "sysfs.h" @@ -755,8 +756,8 @@ void gk20a_remove_support(struct gk20a *g) g->sec2.remove_support(&g->sec2); } - if (g->gr.remove_support) - g->gr.remove_support(&g->gr); + if (g->gr->remove_support) + g->gr->remove_support(g); if (g->mm.remove_ce_support) g->mm.remove_ce_support(&g->mm); @@ -824,6 +825,12 @@ static int gk20a_init_support(struct platform_device *pdev) if (err) goto fail_sim; + err = nvgpu_gr_alloc(g); + if (err != 0) { + nvgpu_err(g, "couldn't allocate gr memory"); + goto fail_sim; + } + nvgpu_init_usermode_support(g); return 0; @@ -1221,7 +1228,7 @@ void gk20a_driver_start_unload(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true); /* GR SW ready needs to be invalidated at this time with the busy lock * held to prevent a racing condition on the gr/mm code */ - g->gr.sw_ready = false; + g->gr->sw_ready = false; g->sw_ready = false; up_write(&l->busy_lock); @@ -1271,7 +1278,7 @@ static int nvgpu_read_fuse_overrides(struct gk20a *g) g->tpc_fs_mask_user = ~value; break; case GP10B_FUSE_OPT_ECC_EN: - g->gr.fecs_feature_override_ecc_val = value; + g->gr->fecs_feature_override_ecc_val = value; break; default: nvgpu_err(g, "ignore unknown fuse override %08x", fuse); @@ -1412,7 +1419,7 @@ return_err: * Last since the above allocs may use data structures in here. */ nvgpu_kmem_fini(gk20a, NVGPU_KMEM_FINI_FORCE_CLEANUP); - + nvgpu_gr_free(gk20a); kfree(l); return err; diff --git a/drivers/gpu/nvgpu/os/linux/pci.c b/drivers/gpu/nvgpu/os/linux/pci.c index d28787f7e..4a1a62927 100644 --- a/drivers/gpu/nvgpu/os/linux/pci.c +++ b/drivers/gpu/nvgpu/os/linux/pci.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "nvlink.h" #include "module.h" @@ -411,6 +412,12 @@ static int nvgpu_pci_init_support(struct pci_dev *pdev) if (err) goto fail_sim; + err = nvgpu_gr_alloc(g); + if (err != 0) { + nvgpu_err(g, "couldn't allocate gr memory"); + goto fail_sim; + } + return 0; fail_sim: @@ -675,6 +682,7 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, err_free_irq: nvgpu_free_irq(g); + nvgpu_gr_free(g); err_disable_msi: #if defined(CONFIG_PCI_MSI) if (g->msi_enabled) diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c index 6992fa5ff..58ee4eddf 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c +++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c @@ -158,13 +158,13 @@ static int gp10b_tegra_probe(struct device *dev) platform->disable_bigpage = !dev->archdata.iommu; - platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close + platform->g->gr->ctx_vars.dump_ctxsw_stats_on_channel_close = false; - platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close + platform->g->gr->ctx_vars.dump_ctxsw_stats_on_channel_close = false; - platform->g->gr.ctx_vars.force_preemption_gfxp = false; - platform->g->gr.ctx_vars.force_preemption_cilp = false; + platform->g->gr->ctx_vars.force_preemption_gfxp = false; + platform->g->gr->ctx_vars.force_preemption_cilp = false; gp10b_tegra_get_clocks(dev); nvgpu_linux_init_clk_support(platform->g); diff --git a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c index bdcb8a4bc..325a7677f 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c +++ b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c @@ -86,13 +86,13 @@ static int gv11b_tegra_probe(struct device *dev) platform->disable_bigpage = !dev->archdata.iommu; - platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close + platform->g->gr->ctx_vars.dump_ctxsw_stats_on_channel_close = false; - platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close + platform->g->gr->ctx_vars.dump_ctxsw_stats_on_channel_close = false; - platform->g->gr.ctx_vars.force_preemption_gfxp = false; - platform->g->gr.ctx_vars.force_preemption_cilp = false; + platform->g->gr->ctx_vars.force_preemption_gfxp = false; + platform->g->gr->ctx_vars.force_preemption_cilp = false; gp10b_tegra_get_clocks(dev); nvgpu_linux_init_clk_support(platform->g); diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c index 7cbfed498..50c42f801 100644 --- a/drivers/gpu/nvgpu/os/linux/sysfs.c +++ b/drivers/gpu/nvgpu/os/linux/sysfs.c @@ -825,7 +825,7 @@ static ssize_t tpc_pg_mask_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct gk20a *g = get_gk20a(dev); - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; unsigned long val = 0; nvgpu_mutex_acquire(&g->tpc_pg_lock); @@ -871,7 +871,7 @@ static ssize_t tpc_fs_mask_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct gk20a *g = get_gk20a(dev); - struct nvgpu_gr_config *config = g->gr.config; + struct nvgpu_gr_config *config = g->gr->config; unsigned long val = 0; if (kstrtoul(buf, 10, &val) < 0) @@ -887,14 +887,14 @@ static ssize_t tpc_fs_mask_store(struct device *dev, g->ops.gr.set_gpc_tpc_mask(g, 0); - nvgpu_gr_obj_ctx_deinit(g, g->gr.golden_image); + nvgpu_gr_obj_ctx_deinit(g, g->gr->golden_image); - g->gr.ctx_vars.golden_image_initialized = false; - nvgpu_gr_obj_ctx_set_golden_image_size(g->gr.golden_image, 0); + g->gr->ctx_vars.golden_image_initialized = false; + nvgpu_gr_obj_ctx_set_golden_image_size(g->gr->golden_image, 0); - nvgpu_gr_config_deinit(g, g->gr.config); + nvgpu_gr_config_deinit(g, g->gr->config); /* Cause next poweron to reinit just gr */ - g->gr.sw_ready = false; + g->gr->sw_ready = false; } return count; @@ -904,7 +904,7 @@ static ssize_t tpc_fs_mask_read(struct device *dev, struct device_attribute *attr, char *buf) { struct gk20a *g = get_gk20a(dev); - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 gpc_index; u32 tpc_fs_mask = 0; int err = 0; @@ -989,7 +989,7 @@ static ssize_t gfxp_wfi_timeout_count_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct gk20a *g = get_gk20a(dev); - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; unsigned long val = 0; int err = -1; @@ -1025,7 +1025,7 @@ static ssize_t gfxp_wfi_timeout_unit_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct gk20a *g = get_gk20a(dev); - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; int err = -1; if (count > 0 && buf[0] == 's') @@ -1058,7 +1058,7 @@ static ssize_t gfxp_wfi_timeout_count_read(struct device *dev, struct device_attribute *attr, char *buf) { struct gk20a *g = get_gk20a(dev); - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; u32 val = gr->gfxp_wfi_timeout_count; return snprintf(buf, PAGE_SIZE, "%d\n", val); @@ -1068,7 +1068,7 @@ static ssize_t gfxp_wfi_timeout_unit_read(struct device *dev, struct device_attribute *attr, char *buf) { struct gk20a *g = get_gk20a(dev); - struct gr_gk20a *gr = &g->gr; + struct nvgpu_gr *gr = g->gr; if (gr->gfxp_wfi_timeout_unit_usec) return snprintf(buf, PAGE_SIZE, "usec\n"); diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c index 11db7df76..9073a6582 100644 --- a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c +++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c @@ -140,11 +140,17 @@ static int vgpu_init_support(struct platform_device *pdev) g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K); if (!g->dbg_regops_tmp_buf) { nvgpu_err(g, "couldn't allocate regops tmp buf"); - return -ENOMEM; + err = -ENOMEM; } g->dbg_regops_tmp_buf_ops = SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); + err = nvgpu_gr_alloc(g); + if (err != 0) { + nvgpu_err(g, "couldn't allocate gr memory"); + goto fail; + } + g->remove_support = vgpu_remove_support; return 0; @@ -361,7 +367,11 @@ int vgpu_probe(struct platform_device *pdev) if (err) return err; - vgpu_init_support(pdev); + err = vgpu_init_support(pdev); + if (err != 0) { + kfree(l); + return -ENOMEM; + } vgpu_init_vars(gk20a, platform); @@ -374,6 +384,7 @@ int vgpu_probe(struct platform_device *pdev) /* Initialize the platform interface. */ err = platform->probe(dev); if (err) { + nvgpu_gr_free(gk20a); if (err == -EPROBE_DEFER) nvgpu_info(gk20a, "platform probe failed"); else @@ -385,6 +396,7 @@ int vgpu_probe(struct platform_device *pdev) err = platform->late_probe(dev); if (err) { nvgpu_err(gk20a, "late probe failed"); + nvgpu_gr_free(gk20a); return err; } } @@ -392,12 +404,14 @@ int vgpu_probe(struct platform_device *pdev) err = vgpu_comm_init(gk20a); if (err) { nvgpu_err(gk20a, "failed to init comm interface"); + nvgpu_gr_free(gk20a); return -ENOSYS; } priv->virt_handle = vgpu_connect(); if (!priv->virt_handle) { nvgpu_err(gk20a, "failed to connect to server node"); + nvgpu_gr_free(gk20a); vgpu_comm_deinit(); return -ENOSYS; } @@ -405,19 +419,23 @@ int vgpu_probe(struct platform_device *pdev) err = vgpu_get_constants(gk20a); if (err) { vgpu_comm_deinit(); + nvgpu_gr_free(gk20a); return err; } err = vgpu_pm_init(dev); if (err) { nvgpu_err(gk20a, "pm init failed"); + nvgpu_gr_free(gk20a); return err; } err = nvgpu_thread_create(&priv->intr_handler, gk20a, vgpu_intr_thread, "gk20a"); - if (err) + if (err) { + nvgpu_gr_free(gk20a); return err; + } gk20a_debug_init(gk20a, "gpu.0");