diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index 3051135fd..8e46d70de 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -1005,7 +1005,6 @@ static void channel_free(struct nvgpu_channel *ch, bool force) nvgpu_cic_rm_wait_for_deferred_interrupts(g); unbind: - g->ops.channel.unbind(ch); g->ops.channel.free_inst(g, ch); nvgpu_channel_wdt_destroy(ch->wdt); @@ -1520,6 +1519,14 @@ int nvgpu_channel_setup_bind(struct nvgpu_channel *c, } #endif + c->replayable = false; + +#ifdef CONFIG_NVGPU_REPLAYABLE_FAULT + if ((args->flags & NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) != 0U) { + c->replayable = true; + } +#endif + if ((args->flags & NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT) != 0U) { err = nvgpu_channel_setup_usermode(c, args); } else { diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index 357292631..fc0be9f21 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -38,6 +38,7 @@ #include #include #include +#include #ifdef CONFIG_NVGPU_PROFILER #include #endif @@ -292,12 +293,7 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg, } #endif - /** - * Remove channel from TSG and re-enable rest of the channels. - * Since channel removal can lead to subctx removal and/or - * VM mappings removal, acquire ctx_init_lock. - */ - nvgpu_mutex_acquire(&tsg->ctx_init_lock); + g->ops.channel.unbind(ch); nvgpu_rwsem_down_write(&tsg->ch_list_lock); nvgpu_tsg_subctx_unbind_channel(tsg, ch); @@ -311,8 +307,6 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg, g->ops.channel.disable(ch); nvgpu_rwsem_up_write(&tsg->ch_list_lock); - nvgpu_mutex_release(&tsg->ctx_init_lock); - /* * Don't re-enable all channels if TSG has timed out already * @@ -345,8 +339,16 @@ int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch, nvgpu_log_fn(g, "unbind tsg:%u ch:%u\n", tsg->tsgid, ch->chid); + /** + * Remove channel from TSG and re-enable rest of the channels. + * Since channel removal can lead to subctx removal and/or + * VM mappings removal, acquire ctx_init_lock. + */ + nvgpu_mutex_acquire(&tsg->ctx_init_lock); + err = nvgpu_tsg_unbind_channel_common(tsg, ch); if (!force && err == -EAGAIN) { + nvgpu_mutex_release(&tsg->ctx_init_lock); return err; } @@ -368,6 +370,8 @@ int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch, } } + nvgpu_mutex_release(&tsg->ctx_init_lock); + nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release); return err; @@ -413,7 +417,7 @@ fail_common: } #endif - nvgpu_mutex_acquire(&tsg->ctx_init_lock); + g->ops.channel.unbind(ch); nvgpu_rwsem_down_write(&tsg->ch_list_lock); nvgpu_tsg_subctx_unbind_channel(tsg, ch); @@ -907,6 +911,15 @@ int nvgpu_tsg_open_common(struct gk20a *g, struct nvgpu_tsg *tsg, pid_t pid) goto clean_up; } + if (g->ops.tsg.init_subctx_state != NULL) { + err = g->ops.tsg.init_subctx_state(g, tsg); + if (err != 0) { + nvgpu_err(g, "tsg %d subctx state init failed %d", + tsg->tsgid, err); + goto clean_up; + } + } + #ifdef CONFIG_NVGPU_SM_DIVERSITY nvgpu_gr_ctx_set_sm_diversity_config(tsg->gr_ctx, NVGPU_INVALID_SM_CONFIG_ID); @@ -970,6 +983,10 @@ void nvgpu_tsg_release_common(struct gk20a *g, struct nvgpu_tsg *tsg) nvgpu_free_gr_ctx_struct(g, tsg->gr_ctx); tsg->gr_ctx = NULL; + if (g->ops.tsg.deinit_subctx_state != NULL) { + g->ops.tsg.deinit_subctx_state(g, tsg); + } + if (g->ops.tsg.deinit_eng_method_buffers != NULL) { g->ops.tsg.deinit_eng_method_buffers(g, tsg); } diff --git a/drivers/gpu/nvgpu/common/fifo/tsg_subctx.c b/drivers/gpu/nvgpu/common/fifo/tsg_subctx.c index 4864ccf72..ece7d6517 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg_subctx.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg_subctx.c @@ -120,6 +120,10 @@ void nvgpu_tsg_subctx_unbind_channel(struct nvgpu_tsg *tsg, nvgpu_list_del(&ch->subctx_entry); if (nvgpu_list_empty(&subctx->ch_list)) { + if (g->ops.tsg.remove_subctx_channel_hw != NULL) { + g->ops.tsg.remove_subctx_channel_hw(ch); + } + if (g->ops.gr.setup.free_subctx != NULL) { g->ops.gr.setup.free_subctx(ch); subctx->gr_subctx = NULL; @@ -203,6 +207,22 @@ u32 nvgpu_tsg_subctx_get_id(struct nvgpu_tsg_subctx *subctx) return subctx->subctx_id; } +void nvgpu_tsg_subctx_set_replayable(struct nvgpu_tsg_subctx *subctx, + bool replayable) +{ + subctx->replayable = replayable; +} + +bool nvgpu_tsg_subctx_get_replayable(struct nvgpu_tsg_subctx *subctx) +{ + return subctx->replayable; +} + +struct vm_gk20a *nvgpu_tsg_subctx_get_vm(struct nvgpu_tsg_subctx *subctx) +{ + return subctx->vm; +} + struct nvgpu_gr_ctx_mappings *nvgpu_tsg_subctx_alloc_or_get_mappings( struct gk20a *g, struct nvgpu_tsg *tsg, diff --git a/drivers/gpu/nvgpu/common/fifo/tsg_subctx_priv.h b/drivers/gpu/nvgpu/common/fifo/tsg_subctx_priv.h index ed6376148..0939b7760 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg_subctx_priv.h +++ b/drivers/gpu/nvgpu/common/fifo/tsg_subctx_priv.h @@ -44,6 +44,9 @@ struct nvgpu_tsg_subctx { /** Subcontext's GR ctx header and GR ctx buffers mappings. */ struct nvgpu_gr_subctx *gr_subctx; + /** Replayable faults state for a subcontext. */ + bool replayable; + /** * Subcontext's entry in TSG's (#nvgpu_tsg) subcontexts list * #subctx_list. diff --git a/drivers/gpu/nvgpu/common/gr/gr_falcon.c b/drivers/gpu/nvgpu/common/gr/gr_falcon.c index 919f1b18f..f47cd1567 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_falcon.c +++ b/drivers/gpu/nvgpu/common/gr/gr_falcon.c @@ -227,7 +227,11 @@ static int nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(struct gk20a *g, return err; } - g->ops.mm.init_inst_block_core(&ucode_info->inst_blk_desc, vm, 0); + err = g->ops.mm.init_inst_block_core(&ucode_info->inst_blk_desc, vm, 0); + if (err != 0) { + nvgpu_free_inst_block(g, &ucode_info->inst_blk_desc); + return err; + } /* Map ucode surface to GMMU */ ucode_info->surface_desc.gpu_va = nvgpu_gmmu_map(vm, diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c index c4f8c99cf..035400585 100644 --- a/drivers/gpu/nvgpu/common/mm/mm.c +++ b/drivers/gpu/nvgpu/common/mm/mm.c @@ -228,7 +228,12 @@ static int nvgpu_init_system_vm(struct mm_gk20a *mm) if (err != 0) { goto clean_up_vm; } - g->ops.mm.init_inst_block_core(inst_block, mm->pmu.vm, big_page_size); + + err = g->ops.mm.init_inst_block_core(inst_block, mm->pmu.vm, big_page_size); + if (err != 0) { + nvgpu_free_inst_block(g, inst_block); + goto clean_up_vm; + } return 0; @@ -247,7 +252,12 @@ static int nvgpu_init_hwpm(struct mm_gk20a *mm) if (err != 0) { return err; } - g->ops.mm.init_inst_block_core(inst_block, mm->pmu.vm, 0); + + err = g->ops.mm.init_inst_block_core(inst_block, mm->pmu.vm, 0); + if (err != 0) { + nvgpu_free_inst_block(g, inst_block); + return err; + } return 0; } @@ -366,7 +376,12 @@ static int nvgpu_init_bar1_vm(struct mm_gk20a *mm) if (err != 0) { goto clean_up_vm; } - g->ops.mm.init_inst_block_core(inst_block, mm->bar1.vm, big_page_size); + + err = g->ops.mm.init_inst_block_core(inst_block, mm->bar1.vm, big_page_size); + if (err != 0) { + nvgpu_free_inst_block(g, inst_block); + goto clean_up_vm; + } return 0; @@ -401,7 +416,11 @@ static int nvgpu_init_engine_ucode_vm(struct gk20a *g, goto clean_up_va; } - g->ops.mm.init_inst_block_core(inst_block, ucode->vm, big_page_size); + err = g->ops.mm.init_inst_block_core(inst_block, ucode->vm, big_page_size); + if (err != 0) { + nvgpu_free_inst_block(g, inst_block); + goto clean_up_va; + } return 0; diff --git a/drivers/gpu/nvgpu/common/perf/perfbuf.c b/drivers/gpu/nvgpu/common/perf/perfbuf.c index c541a4e26..a5a1e30d5 100644 --- a/drivers/gpu/nvgpu/common/perf/perfbuf.c +++ b/drivers/gpu/nvgpu/common/perf/perfbuf.c @@ -72,7 +72,12 @@ int nvgpu_perfbuf_init_inst_block(struct gk20a *g) return err; } - g->ops.mm.init_inst_block_core(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0); + err = g->ops.mm.init_inst_block_core(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0); + if (err != 0) { + nvgpu_free_inst_block(g, &mm->perfbuf.inst_block); + return err; + } + g->ops.perf.init_inst_block(g, &mm->perfbuf.inst_block); return 0; diff --git a/drivers/gpu/nvgpu/hal/fifo/channel_ga10b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/channel_ga10b_fusa.c index 26e2014d8..8747f98f0 100644 --- a/drivers/gpu/nvgpu/hal/fifo/channel_ga10b_fusa.c +++ b/drivers/gpu/nvgpu/hal/fifo/channel_ga10b_fusa.c @@ -82,9 +82,19 @@ void ga10b_channel_bind(struct nvgpu_channel *ch) { struct gk20a *g = ch->g; struct nvgpu_runlist *runlist = NULL; + int err; runlist = ch->runlist; + /* Enable subcontext */ + if (g->ops.tsg.add_subctx_channel_hw != NULL) { + err = g->ops.tsg.add_subctx_channel_hw(ch, ch->replayable); + if (err != 0) { + nvgpu_err(g, "Subcontext addition failed %d", err); + return; + } + } + /* Enable channel */ nvgpu_chram_bar0_writel(g, runlist, runlist_chram_channel_r(ch->chid), runlist_chram_channel_update_f( diff --git a/drivers/gpu/nvgpu/hal/fifo/channel_gv11b.h b/drivers/gpu/nvgpu/hal/fifo/channel_gv11b.h index a563b2b27..eb0e64998 100644 --- a/drivers/gpu/nvgpu/hal/fifo/channel_gv11b.h +++ b/drivers/gpu/nvgpu/hal/fifo/channel_gv11b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -30,6 +30,7 @@ struct nvgpu_channel; struct nvgpu_channel_hw_state; struct nvgpu_debug_context; +void gv11b_channel_bind(struct nvgpu_channel *ch); void gv11b_channel_unbind(struct nvgpu_channel *ch); u32 gv11b_channel_count(struct gk20a *g); void gv11b_channel_read_state(struct gk20a *g, struct nvgpu_channel *ch, diff --git a/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c index 59edfbba0..7e52b834f 100644 --- a/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/fifo/channel_gv11b_fusa.c @@ -32,6 +32,42 @@ #include +void gv11b_channel_bind(struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + int err; + + u32 inst_ptr = nvgpu_inst_block_ptr(g, &ch->inst_block); + + nvgpu_log_info(g, "bind channel %d inst ptr 0x%08x", + ch->chid, inst_ptr); + + /* Enable subcontext */ + if (g->ops.tsg.add_subctx_channel_hw != NULL) { + err = g->ops.tsg.add_subctx_channel_hw(ch, ch->replayable); + if (err != 0) { + nvgpu_err(g, "Subcontext addition failed %d", err); + return; + } + } + + /* Enable channel */ + nvgpu_writel(g, ccsr_channel_inst_r(ch->chid), + ccsr_channel_inst_ptr_f(inst_ptr) | + nvgpu_aperture_mask(g, &ch->inst_block, + ccsr_channel_inst_target_sys_mem_ncoh_f(), + ccsr_channel_inst_target_sys_mem_coh_f(), + ccsr_channel_inst_target_vid_mem_f()) | + ccsr_channel_inst_bind_true_f()); + + nvgpu_writel(g, ccsr_channel_r(ch->chid), + (nvgpu_readl(g, ccsr_channel_r(ch->chid)) & + ~ccsr_channel_enable_set_f(~U32(0U))) | + ccsr_channel_enable_set_true_f()); + + nvgpu_atomic_set(&ch->bound, 1); +} + void gv11b_channel_unbind(struct nvgpu_channel *ch) { struct gk20a *g = ch->g; diff --git a/drivers/gpu/nvgpu/hal/fifo/ramfc_ga10b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/ramfc_ga10b_fusa.c index 50840ddc6..f644bd885 100644 --- a/drivers/gpu/nvgpu/hal/fifo/ramfc_ga10b_fusa.c +++ b/drivers/gpu/nvgpu/hal/fifo/ramfc_ga10b_fusa.c @@ -44,7 +44,6 @@ int ga10b_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base, u32 eng_intr_mask = 0U; u32 eng_intr_vector = 0U; u32 eng_bitmask = 0U; - bool replayable = false; (void)flags; @@ -65,18 +64,9 @@ int ga10b_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base, nvgpu_memset(g, mem, 0U, 0U, ram_fc_size_val_v()); -#ifdef CONFIG_NVGPU_REPLAYABLE_FAULT - if ((flags & NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) != 0U) { - replayable = true; - } -#endif - nvgpu_log_info(g, "%llu %u", pbdma_acquire_timeout, g->ops.pbdma.acquire_val(pbdma_acquire_timeout)); - g->ops.ramin.init_subctx_pdb(g, mem, ch->vm->pdb.mem, - replayable, nvgpu_channel_get_max_subctx_count(ch)); - nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(), g->ops.pbdma.get_gp_base(gpfifo_base)); diff --git a/drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b_fusa.c index b2a2245ca..f482b3f98 100644 --- a/drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/fifo/ramfc_gv11b_fusa.c @@ -38,7 +38,6 @@ int gv11b_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base, struct gk20a *g = ch->g; struct nvgpu_mem *mem = &ch->inst_block; u32 data; - bool replayable = false; (void)flags; @@ -46,18 +45,9 @@ int gv11b_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base, nvgpu_memset(g, mem, 0, 0, ram_fc_size_val_v()); -#ifdef CONFIG_NVGPU_REPLAYABLE_FAULT - if ((flags & NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) != 0U) { - replayable = true; - } -#endif - nvgpu_log_info(g, "%llu %u", pbdma_acquire_timeout, g->ops.pbdma.acquire_val(pbdma_acquire_timeout)); - g->ops.ramin.init_subctx_pdb(g, mem, ch->vm->pdb.mem, - replayable, nvgpu_channel_get_max_subctx_count(ch)); - nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(), g->ops.pbdma.get_gp_base(gpfifo_base)); diff --git a/drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c b/drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c index cba9b0373..4a8c577bc 100644 --- a/drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c +++ b/drivers/gpu/nvgpu/hal/fifo/ramfc_tu104.c @@ -39,24 +39,14 @@ int tu104_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base, struct gk20a *g = ch->g; struct nvgpu_mem *mem = &ch->inst_block; u32 data; - bool replayable = false; nvgpu_log_fn(g, " "); nvgpu_memset(g, mem, 0, 0, ram_fc_size_val_v()); -#ifdef CONFIG_NVGPU_REPLAYABLE_FAULT - if ((flags & NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE) != 0U) { - replayable = true; - } -#endif - nvgpu_log_info(g, "%llu %u", pbdma_acquire_timeout, g->ops.pbdma.acquire_val(pbdma_acquire_timeout)); - g->ops.ramin.init_subctx_pdb(g, mem, ch->vm->pdb.mem, - replayable, nvgpu_channel_get_max_subctx_count(ch)); - nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(), g->ops.pbdma.get_gp_base(gpfifo_base)); diff --git a/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h b/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h index c7321f1d0..e0c3e3702 100644 --- a/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h +++ b/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -30,9 +30,15 @@ struct nvgpu_mem; void gv11b_ramin_set_gr_ptr(struct gk20a *g, struct nvgpu_mem *inst_block, u64 gpu_va); +void gv11b_ramin_set_subctx_pdb_info(struct gk20a *g, + u32 subctx_id, struct nvgpu_mem *pdb_mem, + bool replayable, bool add, u32 *subctx_pdb_map); +void gv11b_ramin_init_subctx_pdb_map(struct gk20a *g, + u32 *subctx_pdb_map); +void gv11b_ramin_init_subctx_valid_mask(struct gk20a *g, + struct nvgpu_mem *inst_block, unsigned long *valid_subctx_mask); void gv11b_ramin_init_subctx_pdb(struct gk20a *g, - struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem, - bool replayable, u32 max_subctx_count); + struct nvgpu_mem *inst_block, u32 *subctx_pdb_map); void gv11b_ramin_set_eng_method_buffer(struct gk20a *g, struct nvgpu_mem *inst_block, u64 gpu_va); void gv11b_ramin_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, diff --git a/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b_fusa.c index abb5cb83f..55551e01b 100644 --- a/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/fifo/ramin_gv11b_fusa.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -45,85 +45,89 @@ void gv11b_ramin_set_gr_ptr(struct gk20a *g, ram_in_engine_wfi_ptr_hi_f(addr_hi)); } -static void gv11b_subctx_commit_valid_mask(struct gk20a *g, - struct nvgpu_mem *inst_block, u32 max_subctx_count) +void gv11b_ramin_set_subctx_pdb_info(struct gk20a *g, + u32 subctx_id, struct nvgpu_mem *pdb_mem, + bool replayable, bool add, u32 *subctx_pdb_map) { + u32 format_word = 0; + u32 pdb_addr_lo = 0; + u32 pdb_addr_hi = 0; + u64 pdb_addr; + u32 aperture; + + if (add) { + aperture = nvgpu_aperture_mask(g, pdb_mem, + ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), + ram_in_sc_page_dir_base_target_sys_mem_coh_v(), + ram_in_sc_page_dir_base_target_vid_mem_v()); + + pdb_addr = nvgpu_mem_get_addr(g, pdb_mem); + pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); + pdb_addr_hi = u64_hi32(pdb_addr); + format_word = ram_in_sc_page_dir_base_target_f(aperture, 0U) | + ram_in_sc_page_dir_base_vol_f( + ram_in_sc_page_dir_base_vol_true_v(), 0U) | + ram_in_sc_use_ver2_pt_format_f(1U, 0U) | + ram_in_sc_big_page_size_f(1U, 0U) | + ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); + + if (replayable) { + format_word |= + ram_in_sc_page_dir_base_fault_replay_tex_f(1U, 0U) | + ram_in_sc_page_dir_base_fault_replay_gcc_f(1U, 0U); + } + } else { + aperture = ram_in_sc_page_dir_base_target_invalid_v(); + format_word = ram_in_sc_page_dir_base_target_f(aperture, 0U); + } + + nvgpu_log(g, gpu_dbg_info, "%s subctx[%u] pdb info lo %x hi %x", + add ? "add" : "remove", subctx_id, + format_word, pdb_addr_hi); + + subctx_pdb_map[subctx_id * 4U] = format_word; + subctx_pdb_map[(subctx_id * 4U) + 1U] = pdb_addr_hi; +} + +void gv11b_ramin_init_subctx_pdb_map(struct gk20a *g, + u32 *subctx_pdb_map) +{ + u32 max_subctx_count = g->ops.gr.init.get_max_subctx_count(); + u32 i; + + /* Initially, all subcontexts are invalid in the TSG. */ + for (i = 0; i < max_subctx_count; i++) { + gv11b_ramin_set_subctx_pdb_info(g, i, NULL, false, false, + subctx_pdb_map); + } +} + +void gv11b_ramin_init_subctx_valid_mask(struct gk20a *g, + struct nvgpu_mem *inst_block, unsigned long *valid_subctx_mask) +{ + u32 max_subctx_count = g->ops.gr.init.get_max_subctx_count(); u32 id; - u32 subctx_count = max_subctx_count; for (id = 0U; id < max_subctx_count; id += 32U) { - u32 subctx_mask_max_bit = ((subctx_count < 32U) ? - (subctx_count % 32U) : 0U); - u32 subctx_mask = U32_MAX; - - if (subctx_mask_max_bit != 0U) { - subctx_mask = nvgpu_safe_sub_u32( - BIT32(subctx_mask_max_bit), 1U); - } + u32 subctx_mask = ((u32 *)valid_subctx_mask)[id / 32U]; nvgpu_mem_wr32(g, inst_block, ram_in_sc_pdb_valid_long_w(id), subctx_mask); nvgpu_log(g, gpu_dbg_info | gpu_dbg_mig, - "id[%d] max_subctx_count[%u] subctx_mask_max_bit[%u] " - "subctx_count[%u] subctx_mask[%x] ", - id, max_subctx_count, subctx_mask_max_bit, - subctx_count, subctx_mask); - - if (subctx_count > 32U) { - subctx_count = nvgpu_safe_sub_u32(subctx_count, 32U); - } - } -} - -static void gv11b_subctx_commit_pdb(struct gk20a *g, - struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem, - bool replayable, u32 max_subctx_count) -{ - u32 lo, hi; - u32 subctx_id = 0; - u32 format_word; - u32 pdb_addr_lo, pdb_addr_hi; - u64 pdb_addr; - u32 aperture = nvgpu_aperture_mask(g, pdb_mem, - ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), - ram_in_sc_page_dir_base_target_sys_mem_coh_v(), - ram_in_sc_page_dir_base_target_vid_mem_v()); - - pdb_addr = nvgpu_mem_get_addr(g, pdb_mem); - pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); - pdb_addr_hi = u64_hi32(pdb_addr); - format_word = ram_in_sc_page_dir_base_target_f(aperture, 0U) | - ram_in_sc_page_dir_base_vol_f( - ram_in_sc_page_dir_base_vol_true_v(), 0U) | - ram_in_sc_use_ver2_pt_format_f(1U, 0U) | - ram_in_sc_big_page_size_f(1U, 0U) | - ram_in_sc_page_dir_base_lo_0_f(pdb_addr_lo); - - if (replayable) { - format_word |= - ram_in_sc_page_dir_base_fault_replay_tex_f(1U, 0U) | - ram_in_sc_page_dir_base_fault_replay_gcc_f(1U, 0U); - } - - nvgpu_log(g, gpu_dbg_info, " pdb info lo %x hi %x", - format_word, pdb_addr_hi); - for (subctx_id = 0U; subctx_id < max_subctx_count; subctx_id++) { - lo = ram_in_sc_page_dir_base_vol_w(subctx_id); - hi = ram_in_sc_page_dir_base_hi_w(subctx_id); - nvgpu_mem_wr32(g, inst_block, lo, format_word); - nvgpu_mem_wr32(g, inst_block, hi, pdb_addr_hi); + "id[%d] max_subctx_count[%u] subctx_mask[%x] ", + id, max_subctx_count, subctx_mask); } } void gv11b_ramin_init_subctx_pdb(struct gk20a *g, - struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem, - bool replayable, u32 max_subctx_count) + struct nvgpu_mem *inst_block, u32 *subctx_pdb_map) { - gv11b_subctx_commit_pdb(g, inst_block, pdb_mem, replayable, - max_subctx_count); - gv11b_subctx_commit_valid_mask(g, inst_block, max_subctx_count); + u32 max_subctx_count = g->ops.gr.init.get_max_subctx_count(); + u32 size = max_subctx_count * 4U * 4U; + nvgpu_mem_wr_n(g, inst_block, ram_in_sc_page_dir_base_vol_w(0) * 4U, + subctx_pdb_map, size); } void gv11b_ramin_set_eng_method_buffer(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h index f148cf23b..cb55192e0 100644 --- a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h +++ b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -38,4 +38,61 @@ void gv11b_tsg_deinit_eng_method_buffers(struct gk20a *g, void gv11b_tsg_bind_channel_eng_method_buffers(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch); +/** + * @brief Initialize subcontext PDB map and valid mask for a TSG. + * + * @param g [in] Pointer to GPU driver struct. + * @param tsg [in] Pointer to TSG struct. + * + * - If subcontexts are enabled: + * - Allocate array of PDB configuration values for maximum supported + * subcontexts. + * - Initialize the array by calling g->ops.ramin.init_subctx_pdb_map. + * - Allocate valid subcontexts bitmask. + * + * @return 0 in case of success, < 0 in case of failure. + */ +int gv11b_tsg_init_subctx_state(struct gk20a *g, struct nvgpu_tsg *tsg); + +/** + * @brief Deinitialize subcontext PDB map and valid mask for a TSG. + * + * @param g [in] Pointer to GPU driver struct. + * @param tsg [in] Pointer to TSG struct. + * + * - If subcontexts are enabled: + * - Free array of PDB configuration values. + * - Free valid subcontexts bitmask. + */ +void gv11b_tsg_deinit_subctx_state(struct gk20a *g, struct nvgpu_tsg *tsg); + +/** + * @brief Add a subctx channel to TSG. + * + * @param ch [in] Pointer to Channel struct. + * @param replayable [in] replayable state of the channel. + * + * - If subcontexts are enabled: + * - Update subcontext info in TSG members if this is first channel + * of a subcontext and update instance blocks of all channels + * in the TSG with this information. + * - If this is a channel in existing subcontext then: + * - Update the channel instance block with subcontext info. + * + * @return 0 in case of success, < 0 in case of failure. + */ +int gv11b_tsg_add_subctx_channel_hw(struct nvgpu_channel *ch, bool replayable); + +/** + * @brief Remove a subctx channel from TSG. + * + * @param ch [in] Pointer to Channel struct. + * + * - If subcontexts are enabled: + * - Update subcontext info in TSG members as this is the last channel + * of a subcontext and update instance blocks of all channels + * in the TSG with this information. + */ +void gv11b_tsg_remove_subctx_channel_hw(struct nvgpu_channel *ch); + #endif /* NVGPU_TSG_GV11B_H */ diff --git a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b_fusa.c index 760cef6bb..cbd77728d 100644 --- a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b_fusa.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -174,3 +175,192 @@ void gv11b_tsg_deinit_eng_method_buffers(struct gk20a *g, nvgpu_log_info(g, "eng method buffers de-allocated"); } + +int gv11b_tsg_init_subctx_state(struct gk20a *g, struct nvgpu_tsg *tsg) +{ + u32 max_subctx_count; + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + return 0; + } + + max_subctx_count = g->ops.gr.init.get_max_subctx_count(); + + /* + * Allocate an array of subctx PDB configuration values for all supported + * subcontexts. For each subctx, there will be two registers to be + * configured, ram_in_sc_page_dir_base_lo_w(i) and + * ram_in_sc_page_dir_base_hi_w(i) in the instance block for the channels + * belonging to this TSG. Two more unused registers follow these for each + * subcontext. Same PDB table/array is programmed in the instance block + * of all the channels. + * + * As the subcontexts are bound to the TSG, their configurations register + * values are added to the array and corresponding bit is set in the + * valid_subctxs bitmask. And as the subcontexts are unbound from + * the TSG, their configurations register values are added to the + * array and corresponding bit is set in the valid_subctxs bitmask. + */ + tsg->subctx_pdb_map = nvgpu_kzalloc(g, max_subctx_count * sizeof(u32) * 4U); + if (tsg->subctx_pdb_map == NULL) { + nvgpu_err(g, "subctx_pdb_map alloc failed"); + return -ENOMEM; + } + + g->ops.ramin.init_subctx_pdb_map(g, tsg->subctx_pdb_map); + + tsg->valid_subctxs = nvgpu_kzalloc(g, + BITS_TO_LONGS(max_subctx_count) * + sizeof(unsigned long)); + if (tsg->valid_subctxs == NULL) { + nvgpu_err(g, "valid_subctxs bitmap alloc failed"); + nvgpu_kfree(g, tsg->subctx_pdb_map); + tsg->subctx_pdb_map = NULL; + return -ENOMEM; + } + + return 0; +} + +void gv11b_tsg_deinit_subctx_state(struct gk20a *g, struct nvgpu_tsg *tsg) +{ + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + return; + } + + nvgpu_kfree(g, tsg->subctx_pdb_map); + tsg->subctx_pdb_map = NULL; + + nvgpu_kfree(g, tsg->valid_subctxs); + tsg->valid_subctxs = NULL; +} + +static void gv11b_tsg_update_inst_blocks_subctxs(struct nvgpu_tsg *tsg) +{ + struct gk20a *g = tsg->g; + struct nvgpu_channel *ch; + + nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) { + g->ops.ramin.init_subctx_pdb(g, &ch->inst_block, + tsg->subctx_pdb_map); + g->ops.ramin.init_subctx_mask(g, &ch->inst_block, + tsg->valid_subctxs); + } +} + +static void gv11b_tsg_update_subctxs(struct nvgpu_tsg *tsg, u32 subctx_id, + struct vm_gk20a *vm, bool replayable, bool add) +{ + struct gk20a *g = tsg->g; + + if (add) { + g->ops.ramin.set_subctx_pdb_info(g, subctx_id, vm->pdb.mem, + replayable, true, tsg->subctx_pdb_map); + nvgpu_set_bit(subctx_id, tsg->valid_subctxs); + } else { + g->ops.ramin.set_subctx_pdb_info(g, subctx_id, NULL, + false, false, tsg->subctx_pdb_map); + nvgpu_clear_bit(subctx_id, tsg->valid_subctxs); + } + + gv11b_tsg_update_inst_blocks_subctxs(tsg); +} + +static void gv11b_tsg_add_new_subctx_channel_hw(struct nvgpu_channel *ch, + bool replayable) +{ + struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); + struct nvgpu_tsg_subctx *subctx = ch->subctx; + struct vm_gk20a *vm = nvgpu_tsg_subctx_get_vm(subctx); + u32 subctx_id = nvgpu_tsg_subctx_get_id(subctx); + + nvgpu_tsg_subctx_set_replayable(subctx, replayable); + + gv11b_tsg_update_subctxs(tsg, subctx_id, vm, replayable, true); +} + +static void gv11b_tsg_add_existing_subctx_channel_hw(struct nvgpu_channel *ch, + bool replayable) +{ + struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); + struct nvgpu_tsg_subctx *subctx = ch->subctx; + struct gk20a *g = ch->g; + + if (nvgpu_tsg_subctx_get_replayable(subctx) != replayable) { + nvgpu_err(g, "subctx replayable mismatch. ignoring."); + } + + g->ops.ramin.init_subctx_pdb(g, &ch->inst_block, tsg->subctx_pdb_map); + g->ops.ramin.init_subctx_mask(g, &ch->inst_block, tsg->valid_subctxs); +} + +int gv11b_tsg_add_subctx_channel_hw(struct nvgpu_channel *ch, bool replayable) +{ + struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); + struct gk20a *g = tsg->g; + int err; + + nvgpu_log(g, gpu_dbg_fn, " "); + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + return 0; + } + + /* + * Add new subcontext to the TSG. Sequence for this is below: + * 1. Disable TSG. + * 2. Preempt TSG. + * 3. Program subctx PDBs in instance blocks of all channels in + * the TSG. + * 4. Enable TSG. + * This sequence is executed acquiring TSG level lock ctx_init_lock. + * to synchronize with channels from other subcontexts. + * ctx_init_lock is reused here. It is originally there for + * synchronizing the GR context initialization by various + * channels in the TSG. + */ + + nvgpu_mutex_acquire(&tsg->ctx_init_lock); + + g->ops.tsg.disable(tsg); + err = g->ops.fifo.preempt_tsg(g, tsg); + if (err != 0) { + g->ops.tsg.enable(tsg); + nvgpu_mutex_release(&tsg->ctx_init_lock); + nvgpu_err(g, "preempt failed %d", err); + return err; + } + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + + if (!nvgpu_test_bit(ch->subctx_id, tsg->valid_subctxs)) { + gv11b_tsg_add_new_subctx_channel_hw(ch, replayable); + } else { + gv11b_tsg_add_existing_subctx_channel_hw(ch, replayable); + } + + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + + + g->ops.tsg.enable(tsg); + nvgpu_mutex_release(&tsg->ctx_init_lock); + + nvgpu_log(g, gpu_dbg_fn, "done"); + + return 0; +} + +void gv11b_tsg_remove_subctx_channel_hw(struct nvgpu_channel *ch) +{ + struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch); + struct gk20a *g = tsg->g; + u32 subctx_id; + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + return; + } + + subctx_id = nvgpu_tsg_subctx_get_id(ch->subctx); + + gv11b_tsg_update_subctxs(tsg, subctx_id, NULL, false, false); +} diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga100.c b/drivers/gpu/nvgpu/hal/init/hal_ga100.c index 34bdb2071..01036cf87 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c +++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c @@ -1131,7 +1131,10 @@ static const struct gops_ramin ga100_ops_ramin = { .set_gr_ptr = gv11b_ramin_set_gr_ptr, .set_big_page_size = gm20b_ramin_set_big_page_size, .init_pdb = ga10b_ramin_init_pdb, + .init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map, + .set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info, .init_subctx_pdb = gv11b_ramin_init_subctx_pdb, + .init_subctx_mask = gv11b_ramin_init_subctx_valid_mask, .set_adr_limit = NULL, .base_shift = gk20a_ramin_base_shift, .alloc_size = gk20a_ramin_alloc_size, @@ -1199,6 +1202,10 @@ static const struct gops_channel ga100_ops_channel = { static const struct gops_tsg ga100_ops_tsg = { .enable = gv11b_tsg_enable, .disable = nvgpu_tsg_disable, + .init_subctx_state = gv11b_tsg_init_subctx_state, + .deinit_subctx_state = gv11b_tsg_deinit_subctx_state, + .add_subctx_channel_hw = gv11b_tsg_add_subctx_channel_hw, + .remove_subctx_channel_hw = gv11b_tsg_remove_subctx_channel_hw, .init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers, .deinit_eng_method_buffers = gv11b_tsg_deinit_eng_method_buffers, .bind_channel = NULL, diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c index 2342f5edb..2fbd2c163 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c @@ -1145,7 +1145,10 @@ static const struct gops_ramin ga10b_ops_ramin = { .set_gr_ptr = gv11b_ramin_set_gr_ptr, .set_big_page_size = gm20b_ramin_set_big_page_size, .init_pdb = ga10b_ramin_init_pdb, + .init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map, + .set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info, .init_subctx_pdb = gv11b_ramin_init_subctx_pdb, + .init_subctx_mask = gv11b_ramin_init_subctx_valid_mask, .set_adr_limit = NULL, .base_shift = gk20a_ramin_base_shift, .alloc_size = gk20a_ramin_alloc_size, @@ -1214,6 +1217,10 @@ static const struct gops_channel ga10b_ops_channel = { static const struct gops_tsg ga10b_ops_tsg = { .enable = gv11b_tsg_enable, .disable = nvgpu_tsg_disable, + .init_subctx_state = gv11b_tsg_init_subctx_state, + .deinit_subctx_state = gv11b_tsg_deinit_subctx_state, + .add_subctx_channel_hw = gv11b_tsg_add_subctx_channel_hw, + .remove_subctx_channel_hw = gv11b_tsg_remove_subctx_channel_hw, .init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers, .deinit_eng_method_buffers = gv11b_tsg_deinit_eng_method_buffers, .bind_channel = NULL, diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c index 35604bc97..f34b77cb9 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c @@ -803,7 +803,7 @@ static const struct gops_mm gm20b_ops_mm = { .setup_hw = nvgpu_mm_setup_hw, .is_bar1_supported = gm20b_mm_is_bar1_supported, .init_inst_block = gk20a_mm_init_inst_block, - .init_inst_block_core = gk20a_mm_init_inst_block, + .init_inst_block_core = gk20a_mm_init_inst_block_core, .get_default_va_sizes = gm20b_mm_get_default_va_sizes, #ifdef CONFIG_NVGPU_USERD .bar1_map_userd = gk20a_mm_bar1_map_userd, diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index e691fad13..d575f30ca 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -969,7 +969,10 @@ static const struct gops_ramin gv11b_ops_ramin = { .set_gr_ptr = gv11b_ramin_set_gr_ptr, .set_big_page_size = gm20b_ramin_set_big_page_size, .init_pdb = gv11b_ramin_init_pdb, + .init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map, + .set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info, .init_subctx_pdb = gv11b_ramin_init_subctx_pdb, + .init_subctx_mask = gv11b_ramin_init_subctx_valid_mask, .set_adr_limit = NULL, .base_shift = gk20a_ramin_base_shift, .alloc_size = gk20a_ramin_alloc_size, @@ -1013,7 +1016,7 @@ static const struct gops_userd gv11b_ops_userd = { static const struct gops_channel gv11b_ops_channel = { .alloc_inst = nvgpu_channel_alloc_inst, .free_inst = nvgpu_channel_free_inst, - .bind = gm20b_channel_bind, + .bind = gv11b_channel_bind, .unbind = gv11b_channel_unbind, .enable = gk20a_channel_enable, .disable = gk20a_channel_disable, @@ -1030,6 +1033,10 @@ static const struct gops_channel gv11b_ops_channel = { static const struct gops_tsg gv11b_ops_tsg = { .enable = gv11b_tsg_enable, .disable = nvgpu_tsg_disable, + .init_subctx_state = gv11b_tsg_init_subctx_state, + .deinit_subctx_state = gv11b_tsg_deinit_subctx_state, + .add_subctx_channel_hw = gv11b_tsg_add_subctx_channel_hw, + .remove_subctx_channel_hw = gv11b_tsg_remove_subctx_channel_hw, .init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers, .deinit_eng_method_buffers = gv11b_tsg_deinit_eng_method_buffers, .bind_channel = NULL, diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 74790477d..a22151849 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -1026,7 +1026,10 @@ static const struct gops_ramin tu104_ops_ramin = { .set_gr_ptr = gv11b_ramin_set_gr_ptr, .set_big_page_size = gm20b_ramin_set_big_page_size, .init_pdb = gv11b_ramin_init_pdb, + .init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map, + .set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info, .init_subctx_pdb = gv11b_ramin_init_subctx_pdb, + .init_subctx_mask = gv11b_ramin_init_subctx_valid_mask, .set_adr_limit = NULL, .base_shift = gk20a_ramin_base_shift, .alloc_size = gk20a_ramin_alloc_size, @@ -1068,7 +1071,7 @@ static const struct gops_userd tu104_ops_userd = { static const struct gops_channel tu104_ops_channel = { .alloc_inst = nvgpu_channel_alloc_inst, .free_inst = nvgpu_channel_free_inst, - .bind = gm20b_channel_bind, + .bind = gv11b_channel_bind, .unbind = gv11b_channel_unbind, .enable = gk20a_channel_enable, .disable = gk20a_channel_disable, @@ -1085,6 +1088,10 @@ static const struct gops_channel tu104_ops_channel = { static const struct gops_tsg tu104_ops_tsg = { .enable = gv11b_tsg_enable, .disable = nvgpu_tsg_disable, + .init_subctx_state = gv11b_tsg_init_subctx_state, + .deinit_subctx_state = gv11b_tsg_deinit_subctx_state, + .add_subctx_channel_hw = gv11b_tsg_add_subctx_channel_hw, + .remove_subctx_channel_hw = gv11b_tsg_remove_subctx_channel_hw, .init_eng_method_buffers = gv11b_tsg_init_eng_method_buffers, .deinit_eng_method_buffers = gv11b_tsg_deinit_eng_method_buffers, .bind_channel = NULL, diff --git a/drivers/gpu/nvgpu/hal/mm/mm_gk20a.c b/drivers/gpu/nvgpu/hal/mm/mm_gk20a.c index f404b2fee..fc7917cf5 100644 --- a/drivers/gpu/nvgpu/hal/mm/mm_gk20a.c +++ b/drivers/gpu/nvgpu/hal/mm/mm_gk20a.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -44,6 +44,14 @@ void gk20a_mm_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, } } +int gk20a_mm_init_inst_block_core(struct nvgpu_mem *inst_block, + struct vm_gk20a *vm, u32 big_page_size) +{ + gk20a_mm_init_inst_block(inst_block, vm, big_page_size); + + return 0; +} + #ifdef CONFIG_NVGPU_USERD u64 gk20a_mm_bar1_map_userd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset) { diff --git a/drivers/gpu/nvgpu/hal/mm/mm_gk20a.h b/drivers/gpu/nvgpu/hal/mm/mm_gk20a.h index b74049969..8bed6444a 100644 --- a/drivers/gpu/nvgpu/hal/mm/mm_gk20a.h +++ b/drivers/gpu/nvgpu/hal/mm/mm_gk20a.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -30,6 +30,8 @@ struct vm_gk20a; void gk20a_mm_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, u32 big_page_size); +int gk20a_mm_init_inst_block_core(struct nvgpu_mem *inst_block, + struct vm_gk20a *vm, u32 big_page_size); u64 gk20a_mm_bar1_map_userd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset); #endif diff --git a/drivers/gpu/nvgpu/hal/mm/mm_gp10b_fusa.c b/drivers/gpu/nvgpu/hal/mm/mm_gp10b_fusa.c index 53a396596..94a617abe 100644 --- a/drivers/gpu/nvgpu/hal/mm/mm_gp10b_fusa.c +++ b/drivers/gpu/nvgpu/hal/mm/mm_gp10b_fusa.c @@ -51,7 +51,11 @@ int gp10b_mm_init_bar2_vm(struct gk20a *g) goto clean_up_va; } - g->ops.mm.init_inst_block_core(inst_block, mm->bar2.vm, big_page_size); + err = g->ops.mm.init_inst_block_core(inst_block, mm->bar2.vm, big_page_size); + if (err != 0) { + nvgpu_free_inst_block(g, inst_block); + goto clean_up_va; + } return 0; diff --git a/drivers/gpu/nvgpu/hal/mm/mm_gv11b.h b/drivers/gpu/nvgpu/hal/mm/mm_gv11b.h index 754335887..f0e0f7157 100644 --- a/drivers/gpu/nvgpu/hal/mm/mm_gv11b.h +++ b/drivers/gpu/nvgpu/hal/mm/mm_gv11b.h @@ -30,7 +30,7 @@ struct vm_gk20a; void gv11b_mm_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, u32 big_page_size); -void gv11b_mm_init_inst_block_core(struct nvgpu_mem *inst_block, +int gv11b_mm_init_inst_block_core(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, u32 big_page_size); bool gv11b_mm_is_bar1_supported(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/hal/mm/mm_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/mm/mm_gv11b_fusa.c index edf501959..2e8244c38 100644 --- a/drivers/gpu/nvgpu/hal/mm/mm_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/mm/mm_gv11b_fusa.c @@ -21,6 +21,7 @@ */ #include +#include #include #include @@ -42,14 +43,48 @@ void gv11b_mm_init_inst_block(struct nvgpu_mem *inst_block, } } -void gv11b_mm_init_inst_block_core(struct nvgpu_mem *inst_block, +int gv11b_mm_init_inst_block_core(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, u32 big_page_size) { struct gk20a *g = gk20a_from_vm(vm); + u32 max_subctx_count = g->ops.gr.init.get_max_subctx_count(); + unsigned long *valid_subctxs; + u32 *subctx_pdb_map; + + subctx_pdb_map = nvgpu_kzalloc(g, max_subctx_count * sizeof(u32) * 4U); + if (subctx_pdb_map == NULL) { + nvgpu_err(g, "subctx_pdb_map alloc failed"); + return -ENOMEM; + } + + valid_subctxs = nvgpu_kzalloc(g, + BITS_TO_LONGS(max_subctx_count) * + sizeof(unsigned long)); + if (valid_subctxs == NULL) { + nvgpu_err(g, "valid_subctxs bitmask alloc failed"); + nvgpu_kfree(g, subctx_pdb_map); + return -ENOMEM; + } gv11b_mm_init_inst_block(inst_block, vm, big_page_size); - g->ops.ramin.init_subctx_pdb(g, inst_block, vm->pdb.mem, false, 1U); + /* Program subctx pdb info in the instance block */ + g->ops.ramin.init_subctx_pdb_map(g, subctx_pdb_map); + g->ops.ramin.set_subctx_pdb_info(g, CHANNEL_INFO_VEID0, vm->pdb.mem, + false, true, subctx_pdb_map); + g->ops.ramin.init_subctx_pdb(g, inst_block, subctx_pdb_map); + + /* + * Program subctx pdb valid mask in the instance block. + * Only subctx 0 is valid here. + */ + nvgpu_set_bit(CHANNEL_INFO_VEID0, valid_subctxs); + g->ops.ramin.init_subctx_mask(g, inst_block, valid_subctxs); + + nvgpu_kfree(g, valid_subctxs); + nvgpu_kfree(g, subctx_pdb_map); + + return 0; } bool gv11b_mm_is_bar1_supported(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c index dc3244ca5..38c608e5d 100644 --- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c +++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_ga10b.c @@ -693,7 +693,10 @@ static const struct gops_ramin vgpu_ga10b_ops_ramin = { .set_gr_ptr = NULL, .set_big_page_size = gm20b_ramin_set_big_page_size, .init_pdb = gv11b_ramin_init_pdb, + .init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map, + .set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info, .init_subctx_pdb = gv11b_ramin_init_subctx_pdb, + .init_subctx_mask = gv11b_ramin_init_subctx_valid_mask, .set_adr_limit = NULL, .base_shift = gk20a_ramin_base_shift, .alloc_size = gk20a_ramin_alloc_size, @@ -744,6 +747,10 @@ static const struct gops_channel vgpu_ga10b_ops_channel = { static const struct gops_tsg vgpu_ga10b_ops_tsg = { .open = vgpu_tsg_open, .release = vgpu_tsg_release, + .init_subctx_state = NULL, + .deinit_subctx_state = NULL, + .add_subctx_channel_hw = NULL, + .remove_subctx_channel_hw = NULL, .init_eng_method_buffers = NULL, .deinit_eng_method_buffers = NULL, .enable = gv11b_tsg_enable, diff --git a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c index 770f5e7e6..c68c788b9 100644 --- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c @@ -660,7 +660,10 @@ static const struct gops_ramin vgpu_gv11b_ops_ramin = { .set_gr_ptr = NULL, .set_big_page_size = gm20b_ramin_set_big_page_size, .init_pdb = gv11b_ramin_init_pdb, + .init_subctx_pdb_map = gv11b_ramin_init_subctx_pdb_map, + .set_subctx_pdb_info = gv11b_ramin_set_subctx_pdb_info, .init_subctx_pdb = gv11b_ramin_init_subctx_pdb, + .init_subctx_mask = gv11b_ramin_init_subctx_valid_mask, .set_adr_limit = NULL, .base_shift = gk20a_ramin_base_shift, .alloc_size = gk20a_ramin_alloc_size, @@ -712,6 +715,10 @@ static const struct gops_channel vgpu_gv11b_ops_channel = { static const struct gops_tsg vgpu_gv11b_ops_tsg = { .open = vgpu_tsg_open, .release = vgpu_tsg_release, + .init_subctx_state = NULL, + .deinit_subctx_state = NULL, + .add_subctx_channel_hw = NULL, + .remove_subctx_channel_hw = NULL, .init_eng_method_buffers = NULL, .deinit_eng_method_buffers = NULL, .enable = gv11b_tsg_enable, diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 573a2a5a8..1ce5484fb 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -501,6 +501,11 @@ struct nvgpu_channel { /** Runlist the channel will run on. */ struct nvgpu_runlist *runlist; + /** + * Replayable fault state for the channel. + */ + bool replayable; + /** * Recovery path can be entered twice for the same error in * case of mmu_nack. This flag indicates if we already recovered @@ -960,7 +965,7 @@ struct nvgpu_channel *nvgpu_channel_open_new(struct gk20a *g, pid_t pid, pid_t tid); /** - * @brief Setup and bind the channel + * @brief Setup and bind the channel and add subcontext PDB. * * @param ch [in] Channel pointer. * @param args [in] Setup bind arguments. @@ -975,6 +980,7 @@ struct nvgpu_channel *nvgpu_channel_open_new(struct gk20a *g, * provided in args. A submit token is passed back to be written in the * doorbell register in the usermode region to notify the GPU for new * work on this channel. + * Update the instance blocks of all channels to add the subctx pdb. * * @note An address space needs to have been bound to the channel before * calling this function. diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/mm.h b/drivers/gpu/nvgpu/include/nvgpu/gops/mm.h index 60fb44d96..a13c67bdb 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/mm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/mm.h @@ -566,7 +566,7 @@ struct gops_mm { * - Configures the pdb base, big page size and * 0th sub context's pdb base in context's instance block memory. */ - void (*init_inst_block_core)(struct nvgpu_mem *inst_block, + int (*init_inst_block_core)(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, u32 big_page_size); /** diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/ramin.h b/drivers/gpu/nvgpu/include/nvgpu/gops/ramin.h index 2fd5d017a..1e3c54678 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/ramin.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/ramin.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -90,34 +90,78 @@ struct gops_ramin { void (*init_pdb)(struct gk20a *g, struct nvgpu_mem *inst_block, u64 pdb_addr, struct nvgpu_mem *pdb_mem); + /** + * @brief Init subcontext pdb map for a TSG. + * + * @param g [in] Pointer to GPU driver struct. + * @param subctx_pdb_map [in] Memory pointing to pdb map for a TSG. + * + * This HAL configures PDB for all subcontexts of an instance block. + * It sets all PDBs invalid. + */ + void (*init_subctx_pdb_map)(struct gk20a *g, + u32 *subctx_pdb_map); + + /** + * @brief Update subcontext pdb map for subcontext addition/removal. + * + * @param g [in] Pointer to GPU driver struct. + * @param subctx_id [in] Subcontext ID. + * @param pdb_mem [in] Memory descriptor of PDB. + * @param replayable [in] Indicates if errors are replayable + * for this Subcontext. + * @param add [in] Indicate if subcontext PDB is to be + * added or removed. + * @param subctx_pdb_map [in] Memory pointing to pdb map for a TSG. + * + * This HAL configures PDB for sub-context of Instance Block: + * If adding a subcontext PDB: + * - Get aperture mask from \a pdb_mem. + * - Get physical address of \a pdb_mem. + * - Build PDB entry with defaults for PT version, big page size, + * volatile attribute, and above aperture. + * - If \a replayable is true, set replayable attribute for TEX + * and GCC faults. + * - Set lo and hi 32-bits to point to \a pdb_mem. + * - Program related entry in \a subctx_pdb_map. + * If removing a subcontext PDB: + * - Set aperture as ram_in_sc_page_dir_base_target_invalid_v(). + * - Program related entry in \a subctx_pdb_map. + */ + void (*set_subctx_pdb_info)(struct gk20a *g, + u32 subctx_id, struct nvgpu_mem *pdb_mem, + bool replayable, bool add, u32 *subctx_pdb_map); + /** * @brief Init PDB for sub-contexts. * * @param g [in] Pointer to GPU driver struct. * @param inst_block [in] Memory descriptor of Instance Block. - * @param pdb_mem [in] Memory descriptor of PDB. - * @param replayable [in] Indicates if errors are replayable - * for this Instance Block. - * @param max_subctx_count [in] Max number of sub context. + * @param subctx_pdb_map [in] Memory pointing to pdb map for a TSG. * - * This HAL configures PDB for all sub-contexts of Instance Block: - * - Get max number of sub-contexts from HW. - * - Get aperture mask from \a pdb_mem. - * - Get physical address of \a pdb_mem. - * - For each sub-context: - * - Build PDB entry with defaults for PT version, big page size, - * volatile attribute, and above aperture. - * - If \a replayable is true, set replayable attribute for TEX - * and GCC faults. - * - Set lo and hi 32-bits to point to \a pdb_mem. - * - Program related entry in Instance Block. - * - * @see NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE + * This HAL configures PDB for all sub-contexts of Instance Block. + * It copies \a subctx_pdb_map to the offset + * ram_in_sc_page_dir_base_vol_w(0) * 4U in + * the instance block. */ void (*init_subctx_pdb)(struct gk20a *g, - struct nvgpu_mem *inst_block, - struct nvgpu_mem *pdb_mem, - bool replayable, u32 max_subctx_count); + struct nvgpu_mem *inst_block, u32 *subctx_pdb_map); + + /** + * @brief Set valid subcontexts masks. + * + * @param g [in] Pointer to GPU driver struct. + * @param inst_block [in] Memory descriptor of Instance + * Block. + * @param valid_subctx_mask [in] Max number of sub context. + * + * This HAL configures mask for all sub-contexts of Instance Block: + * - Get max number of sub-contexts from HW. + * - For each set of 32 subcontexts, set the mask from + * \a valid_subctx_mask in ram_in_sc_pdb_valid_long_w(). + */ + void (*init_subctx_mask)(struct gk20a *g, + struct nvgpu_mem *inst_block, unsigned long *valid_subctx_mask); /** * @brief Instance Block shift. diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h index aff7a4eb2..b99babb35 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -55,6 +55,11 @@ struct gops_tsg { /** @cond DOXYGEN_SHOULD_SKIP_THIS */ int (*open)(struct nvgpu_tsg *tsg); void (*release)(struct nvgpu_tsg *tsg); + int (*init_subctx_state)(struct gk20a *g, struct nvgpu_tsg *tsg); + void (*deinit_subctx_state)(struct gk20a *g, struct nvgpu_tsg *tsg); + int (*add_subctx_channel_hw)(struct nvgpu_channel *ch, + bool replayable); + void (*remove_subctx_channel_hw)(struct nvgpu_channel *ch); int (*init_eng_method_buffers)(struct gk20a *g, struct nvgpu_tsg *tsg); void (*deinit_eng_method_buffers)(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h index 9b6280df1..cd92c8f27 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h +++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h @@ -117,6 +117,10 @@ struct nvgpu_tsg { */ struct nvgpu_ref refcount; + u32 *subctx_pdb_map; + + unsigned long *valid_subctxs; + /** * List of subcontexts (#nvgpu_tsg_subctx) bound to this TSG. * Accessed by holding #ch_list_lock from TSG. diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg_subctx.h b/drivers/gpu/nvgpu/include/nvgpu/tsg_subctx.h index 73cfd444e..4ce1f7826 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/tsg_subctx.h +++ b/drivers/gpu/nvgpu/include/nvgpu/tsg_subctx.h @@ -60,6 +60,8 @@ int nvgpu_tsg_subctx_bind_channel(struct nvgpu_tsg *tsg, * - Validate that #subctx is allocated for the channel #ch. * - Remove the channel from the subctx #ch_list. * - If the subctx #ch_list is empty + * - Update the instance blocks of all channels to remove the + * subctx pdb. * - Invoke g->ops.gr.setup.free_subctx to free the GR subcontext * struct (and GR subcontext mappings struct). * - Remove the subctx from the TSG #subctx_list. @@ -120,6 +122,35 @@ struct nvgpu_gr_subctx *nvgpu_tsg_subctx_get_gr_subctx( */ u32 nvgpu_tsg_subctx_get_id(struct nvgpu_tsg_subctx *tsg_subctx); +/** + * @brief Set replayable state for a TSG subcontext. + * + * @param subctx [in] Pointer to TSG subcontext. + * @param Replayable [in] replayable state for the subcontext. + * + * - Set #replayable in #nvgpu_tsg_subctx. + */ +void nvgpu_tsg_subctx_set_replayable(struct nvgpu_tsg_subctx *subctx, + bool replayable); + +/** + * @brief Get replayable state for a TSG subcontext. + * + * @param subctx [in] Pointer to TSG subcontext. + * + * - Return #replayable from #nvgpu_tsg_subctx. + */ +bool nvgpu_tsg_subctx_get_replayable(struct nvgpu_tsg_subctx *subctx); + +/** + * @brief Get VM for a TSG subcontext. + * + * @param subctx [in] Pointer to TSG subcontext. + * + * - Return #vm from #nvgpu_tsg_subctx. + */ +struct vm_gk20a *nvgpu_tsg_subctx_get_vm(struct nvgpu_tsg_subctx *subctx); + /** * @brief Allocate or get the mappings struct for the TSG subcontext. * diff --git a/userspace/units/acr/nvgpu-acr.c b/userspace/units/acr/nvgpu-acr.c index 27ea3f0fa..8532f9748 100644 --- a/userspace/units/acr/nvgpu-acr.c +++ b/userspace/units/acr/nvgpu-acr.c @@ -826,24 +826,24 @@ int test_acr_prepare_ucode_blob(struct unit_module *m, nvgpu_posix_enable_fault_injection(kmem_fi, false, 0); - nvgpu_posix_enable_fault_injection(kmem_fi, true, 17); + nvgpu_posix_enable_fault_injection(kmem_fi, true, 19); - unit_info(m, " kmem counter 17\n"); + unit_info(m, " kmem counter 19\n"); err = g->acr->prepare_ucode_blob(g); if (err != -ENOENT) { - unit_return_fail(m, "kmem count 17 test did not fail as expected\n"); + unit_return_fail(m, "kmem count 19 test did not fail as expected\n"); } /* - * the kmem counter is decreased after 17th count + * the kmem counter is decreased after 19th count * because in the first attempt new memory is allocated and mapped for * page directories but after that since memory is already allocated it * is just mapped. Thus, number of kmallocs decrease. */ nvgpu_posix_enable_fault_injection(kmem_fi, false, 0); - for (i = 9; i < 17; i++) { + for (i = 9; i < 19; i++) { unit_info(m, "kmem counter %d\n", i); nvgpu_posix_enable_fault_injection(kmem_fi, true, i); err = g->acr->prepare_ucode_blob(g); diff --git a/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c b/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c index 8b4631f43..096fb5dee 100644 --- a/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c +++ b/userspace/units/fifo/channel/gv11b/nvgpu-channel-gv11b.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -69,6 +69,12 @@ struct unit_ctx { size_t size; }; +static int stub_add_subctx_channel_hw(struct nvgpu_channel *ch, + bool replayable) +{ + return 0; +} + int test_gv11b_channel_unbind(struct unit_module *m, struct gk20a *g, void *args) { @@ -77,6 +83,8 @@ int test_gv11b_channel_unbind(struct unit_module *m, struct nvgpu_channel *ch; int ret = UNIT_FAIL; + g->ops.tsg.add_subctx_channel_hw = stub_add_subctx_channel_hw; + ch = nvgpu_channel_open_new(g, runlist_id, privileged, getpid(), getpid()); unit_assert(ch, goto done); diff --git a/userspace/units/fifo/channel/nvgpu-channel.c b/userspace/units/fifo/channel/nvgpu-channel.c index 3d390f88a..85729ff42 100644 --- a/userspace/units/fifo/channel/nvgpu-channel.c +++ b/userspace/units/fifo/channel/nvgpu-channel.c @@ -1562,6 +1562,12 @@ static void stub_channel_work_completion_cancel_sync(struct nvgpu_channel *ch) } #endif +static int stub_add_subctx_channel_hw(struct nvgpu_channel *ch, + bool replayable) +{ + return 0; +} + int test_channel_suspend_resume_serviceable_chs(struct unit_module *m, struct gk20a *g, void *vargs) { @@ -1594,6 +1600,7 @@ int test_channel_suspend_resume_serviceable_chs(struct unit_module *m, g->ops.fifo.preempt_tsg = stub_fifo_preempt_tsg; g->ops.fifo.preempt_channel = stub_fifo_preempt_channel; g->ops.runlist.reload = stub_runlist_reload; + g->ops.tsg.add_subctx_channel_hw = stub_add_subctx_channel_hw; orig_ch_tsgid = ch->tsgid; for (branches = 0U; branches < F_CHANNEL_SUSPEND_RESUME_CHS_LAST; diff --git a/userspace/units/fifo/ramfc/gv11b/nvgpu-ramfc-gv11b.c b/userspace/units/fifo/ramfc/gv11b/nvgpu-ramfc-gv11b.c index bae3ab33d..3168f6d1c 100644 --- a/userspace/units/fifo/ramfc/gv11b/nvgpu-ramfc-gv11b.c +++ b/userspace/units/fifo/ramfc/gv11b/nvgpu-ramfc-gv11b.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -131,13 +131,6 @@ static int stub_ramfc_commit_userd(struct nvgpu_channel *ch) return 0; } -static void stub_ramin_init_subctx_pdb(struct gk20a *g, - struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem, - bool replayable, u32 max_subctx_count) -{ - global_count++; -} - #define F_RAMFC_SETUP_PRIVILEDGED_CH BIT(0) #define F_RAMFC_SETUP_LAST BIT(1) @@ -156,7 +149,6 @@ int test_gv11b_ramfc_setup(struct unit_module *m, struct gk20a *g, void *args) g->ops.ramin.alloc_size = gk20a_ramin_alloc_size; g->ops.pbdma.acquire_val = stub_pbdma_acquire_val; - g->ops.ramin.init_subctx_pdb = stub_ramin_init_subctx_pdb; g->ops.pbdma.get_gp_base = stub_pbdma_get_gp_base; g->ops.pbdma.get_gp_base_hi = stub_pbdma_get_gp_base_hi; g->ops.pbdma.get_signature = stub_pbdma_get_signature; @@ -199,9 +191,9 @@ int test_gv11b_ramfc_setup(struct unit_module *m, struct gk20a *g, void *args) ram_fc_config_w()) == 5U, goto done); if (branches & F_RAMFC_SETUP_PRIVILEDGED_CH) { - unit_assert(global_count == 15U, goto done); + unit_assert(global_count == 14U, goto done); } else { - unit_assert(global_count == 13U, goto done); + unit_assert(global_count == 12U, goto done); } } diff --git a/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.c b/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.c index a22a0b183..d464d1fd1 100644 --- a/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.c +++ b/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -109,6 +109,24 @@ int test_gv11b_ramin_init_subctx_pdb(struct unit_module *m, struct gk20a *g, u64 pdb_addr; u32 max_subctx_count = ram_in_sc_page_dir_base_target__size_1_v(); u32 aperture = ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(); + unsigned long *valid_subctxs; + u32 *subctx_pdb_map; + + subctx_pdb_map = nvgpu_kzalloc(g, max_subctx_count * sizeof(u32) * 4U); + if (subctx_pdb_map == NULL) { + nvgpu_err(g, "subctx_pdb_map alloc failed"); + return UNIT_FAIL; + } + + valid_subctxs = nvgpu_kzalloc(g, + BITS_TO_LONGS(max_subctx_count) * + sizeof(unsigned long)); + if (valid_subctxs == NULL) { + nvgpu_err(g, "valid_subctxs bitmap alloc failed"); + nvgpu_kfree(g, subctx_pdb_map); + subctx_pdb_map = NULL; + return UNIT_FAIL; + } g->ops.ramin.alloc_size = gk20a_ramin_alloc_size; @@ -146,8 +164,21 @@ int test_gv11b_ramin_init_subctx_pdb(struct unit_module *m, struct gk20a *g, 1U, 0U); } - gv11b_ramin_init_subctx_pdb(g, &inst_block, &pdb_mem, - replayable, 64); + g->ops.ramin.init_subctx_pdb_map(g, subctx_pdb_map); + for (subctx_id = 0; subctx_id < max_subctx_count; subctx_id++) { + g->ops.ramin.set_subctx_pdb_info(g, subctx_id, + &pdb_mem, replayable, true, subctx_pdb_map); + nvgpu_set_bit(subctx_id, valid_subctxs); + } + + /* Program subctx pdb info in the instance block */ + g->ops.ramin.init_subctx_pdb(g, &inst_block, subctx_pdb_map); + + /* + * Program subctx pdb valid mask in the instance block. + * Only subctx 0 is valid here. + */ + g->ops.ramin.init_subctx_mask(g, &inst_block, valid_subctxs); for (subctx_id = 0; subctx_id < max_subctx_count; subctx_id++) { addr_lo = ram_in_sc_page_dir_base_vol_w(subctx_id); diff --git a/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.h b/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.h index 65e96e0c6..def991470 100644 --- a/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.h +++ b/userspace/units/fifo/ramin/gv11b/ramin-gv11b-fusa.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -63,7 +63,7 @@ int test_gv11b_ramin_set_gr_ptr(struct unit_module *m, struct gk20a *g, * Test Type: Feature * * Targets: gops_ramin.init_subctx_pdb, gv11b_ramin_init_subctx_pdb, - * gv11b_subctx_commit_pdb, gv11b_subctx_commit_valid_mask + * gops_ramin.init_subctx_mask, gv11b_ramin_init_subctx_valid_mask * * Input: None *