diff --git a/arch/nvgpu-common.yaml b/arch/nvgpu-common.yaml index 240d2ede8..1cf9395e7 100644 --- a/arch/nvgpu-common.yaml +++ b/arch/nvgpu-common.yaml @@ -446,6 +446,12 @@ fifo: include/nvgpu/gops/tsg.h, include/nvgpu/tsg.h ] deps: [ ] + tsg_subctx: + safe: yes + sources: [ common/fifo/tsg_subctx.c, + common/fifo/tsg_subctx_priv.h, + include/nvgpu/tsg_subctx.h ] + deps: [ ] submit: safe: yes sources: [ common/fifo/submit.c, diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 718f910f2..60b1f02cb 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -647,6 +647,7 @@ nvgpu-y += \ common/fifo/job.o \ common/fifo/priv_cmdbuf.o \ common/fifo/tsg.o \ + common/fifo/tsg_subctx.o \ common/fifo/runlist.o \ common/fifo/engine_status.o \ common/fifo/engines.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index eaf74ace9..37e2a3040 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -155,6 +155,7 @@ srcs += common/device.c \ common/fifo/fifo.c \ common/fifo/pbdma.c \ common/fifo/tsg.c \ + common/fifo/tsg_subctx.c \ common/fifo/runlist.c \ common/fifo/engine_status.c \ common/fifo/engines.c \ diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index 113e924ee..3051135fd 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -983,11 +983,6 @@ static void channel_free(struct nvgpu_channel *ch, bool force) g->ops.gr.fecs_trace.unbind_channel(g, &ch->inst_block); #endif - if (g->ops.gr.setup.free_subctx != NULL) { - g->ops.gr.setup.free_subctx(ch); - ch->subctx = NULL; - } - g->ops.gr.intr.flush_channel_tlb(g); if (ch->usermode_submit_enabled) { @@ -1803,6 +1798,7 @@ int nvgpu_channel_init_support(struct gk20a *g, u32 chid) nvgpu_mutex_init(&c->dbg_s_lock); #endif nvgpu_init_list_node(&c->ch_entry); + nvgpu_init_list_node(&c->subctx_entry); nvgpu_list_add(&c->free_chs, &g->fifo.free_chs); return 0; diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index 40e30191e..357292631 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -142,6 +143,13 @@ int nvgpu_tsg_bind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) } nvgpu_rwsem_down_write(&tsg->ch_list_lock); + err = nvgpu_tsg_subctx_bind_channel(tsg, ch); + if (err != 0) { + nvgpu_err(g, "Subcontext %u bind failed", ch->subctx_id); + nvgpu_rwsem_up_write(&tsg->ch_list_lock); + return err; + } + nvgpu_list_add_tail(&ch->ch_entry, &tsg->ch_list); tsg->ch_count = nvgpu_safe_add_u32(tsg->ch_count, 1U); ch->tsgid = tsg->tsgid; @@ -284,8 +292,15 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg, } #endif - /* Remove channel from TSG and re-enable rest of the channels */ + /** + * Remove channel from TSG and re-enable rest of the channels. + * Since channel removal can lead to subctx removal and/or + * VM mappings removal, acquire ctx_init_lock. + */ + nvgpu_mutex_acquire(&tsg->ctx_init_lock); + nvgpu_rwsem_down_write(&tsg->ch_list_lock); + nvgpu_tsg_subctx_unbind_channel(tsg, ch); nvgpu_list_del(&ch->ch_entry); tsg->ch_count = nvgpu_safe_sub_u32(tsg->ch_count, 1U); ch->tsgid = NVGPU_INVALID_TSG_ID; @@ -296,6 +311,8 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg, g->ops.channel.disable(ch); nvgpu_rwsem_up_write(&tsg->ch_list_lock); + nvgpu_mutex_release(&tsg->ctx_init_lock); + /* * Don't re-enable all channels if TSG has timed out already * @@ -396,12 +413,17 @@ fail_common: } #endif + nvgpu_mutex_acquire(&tsg->ctx_init_lock); + nvgpu_rwsem_down_write(&tsg->ch_list_lock); + nvgpu_tsg_subctx_unbind_channel(tsg, ch); nvgpu_list_del(&ch->ch_entry); ch->tsgid = NVGPU_INVALID_TSG_ID; tsg->ch_count = nvgpu_safe_sub_u32(tsg->ch_count, 1U); nvgpu_rwsem_up_write(&tsg->ch_list_lock); + nvgpu_mutex_release(&tsg->ctx_init_lock); + nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release); return err; @@ -512,6 +534,8 @@ static void nvgpu_tsg_init_support(struct gk20a *g, u32 tsgid) tsg->abortable = true; nvgpu_init_list_node(&tsg->ch_list); + nvgpu_init_list_node(&tsg->subctx_list); + nvgpu_init_list_node(&tsg->gr_ctx_mappings_list); nvgpu_rwsem_init(&tsg->ch_list_lock); nvgpu_mutex_init(&tsg->ctx_init_lock); @@ -869,7 +893,6 @@ int nvgpu_tsg_open_common(struct gk20a *g, struct nvgpu_tsg *tsg, pid_t pid) tsg->ch_count = 0U; nvgpu_ref_init(&tsg->refcount); - tsg->vm = NULL; tsg->interleave_level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; tsg->timeslice_us = g->ops.tsg.default_timeslice_us(g); tsg->runlist = NULL; @@ -963,11 +986,6 @@ void nvgpu_tsg_release_common(struct gk20a *g, struct nvgpu_tsg *tsg) tsg->rl_domain = NULL; } - if (tsg->vm != NULL) { - nvgpu_vm_put(tsg->vm); - tsg->vm = NULL; - } - if(tsg->sm_error_states != NULL) { nvgpu_kfree(g, tsg->sm_error_states); tsg->sm_error_states = NULL; diff --git a/drivers/gpu/nvgpu/common/fifo/tsg_subctx.c b/drivers/gpu/nvgpu/common/fifo/tsg_subctx.c new file mode 100644 index 000000000..4864ccf72 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/tsg_subctx.c @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tsg_subctx_priv.h" + +static inline struct nvgpu_tsg_subctx * +nvgpu_tsg_subctx_from_tsg_entry(struct nvgpu_list_node *node) +{ + return (struct nvgpu_tsg_subctx *) + ((uintptr_t)node - offsetof(struct nvgpu_tsg_subctx, tsg_entry)); +}; + +static struct nvgpu_tsg_subctx *nvgpu_tsg_subctx_from_id(struct nvgpu_tsg *tsg, + u32 subctx_id) +{ + struct nvgpu_tsg_subctx *subctx = NULL; + + nvgpu_list_for_each_entry(subctx, &tsg->subctx_list, + nvgpu_tsg_subctx, tsg_entry) { + if (subctx->subctx_id == subctx_id) { + return subctx; + } + } + + return NULL; +} + +int nvgpu_tsg_subctx_bind_channel(struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch) +{ + struct nvgpu_tsg_subctx *subctx = NULL; + struct gk20a *g = tsg->g; + + nvgpu_log(g, gpu_dbg_gr, " "); + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + return 0; + } + + subctx = nvgpu_tsg_subctx_from_id(tsg, ch->subctx_id); + if (subctx != NULL) { + if (subctx->vm != ch->vm) { + nvgpu_err(g, "subctx vm mismatch"); + return -EINVAL; + } + + goto add_ch_subctx; + } + + nvgpu_log(g, gpu_dbg_gr, "Allocating subctx %u", ch->subctx_id); + + subctx = nvgpu_kzalloc(g, sizeof(struct nvgpu_tsg_subctx)); + if (subctx == NULL) { + nvgpu_err(g, "Failed to allocate subctx"); + return -ENOMEM; + } + + subctx->subctx_id = ch->subctx_id; + subctx->tsg = tsg; + subctx->vm = ch->vm; + nvgpu_init_list_node(&subctx->ch_list); + nvgpu_init_list_node(&subctx->tsg_entry); + + nvgpu_list_add_tail(&subctx->tsg_entry, &tsg->subctx_list); + +add_ch_subctx: + ch->subctx = subctx; + nvgpu_list_add_tail(&ch->subctx_entry, &subctx->ch_list); + + nvgpu_log(g, gpu_dbg_gr, "done"); + + return 0; +} + +void nvgpu_tsg_subctx_unbind_channel(struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch) +{ + struct nvgpu_tsg_subctx *subctx; + struct gk20a *g = tsg->g; + + nvgpu_log(g, gpu_dbg_gr, " "); + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + return; + } + + subctx = ch->subctx; + nvgpu_assert(subctx != NULL); + + nvgpu_list_del(&ch->subctx_entry); + + if (nvgpu_list_empty(&subctx->ch_list)) { + if (g->ops.gr.setup.free_subctx != NULL) { + g->ops.gr.setup.free_subctx(ch); + subctx->gr_subctx = NULL; + } + + nvgpu_list_del(&subctx->tsg_entry); + nvgpu_kfree(tsg->g, subctx); + } + + ch->subctx = NULL; + + nvgpu_log(g, gpu_dbg_gr, "done"); +} + +int nvgpu_tsg_subctx_alloc_gr_subctx(struct gk20a *g, struct nvgpu_channel *ch) +{ + struct nvgpu_tsg_subctx *subctx; + + nvgpu_log(g, gpu_dbg_gr, " "); + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + return 0; + } + + subctx = ch->subctx; + if (subctx == NULL) { + nvgpu_err(g, "channel not bound to TSG subctx"); + return -EINVAL; + } + + if (subctx->gr_subctx == NULL) { + subctx->gr_subctx = nvgpu_gr_subctx_alloc(g); + if (subctx->gr_subctx == NULL) { + nvgpu_err(g, "gr_subctx alloc failed"); + return -ENOMEM; + } + } + + nvgpu_log(g, gpu_dbg_gr, "done"); + + return 0; +} + +int nvgpu_tsg_subctx_setup_subctx_header(struct gk20a *g, + struct nvgpu_channel *ch) +{ + struct nvgpu_tsg_subctx *subctx; + int err; + + nvgpu_log(g, gpu_dbg_gr, " "); + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + return 0; + } + + subctx = ch->subctx; + if ((subctx == NULL) || (subctx->gr_subctx == NULL)) { + nvgpu_err(g, "channel not bound to TSG/GR subctx"); + return -EINVAL; + } + + err = nvgpu_gr_subctx_setup_header(g, subctx->gr_subctx, subctx->vm); + if (err != 0) { + nvgpu_err(g, "gr_subctx header setup failed %d", err); + return err; + } + + nvgpu_log(g, gpu_dbg_gr, "done"); + + return 0; +} + +struct nvgpu_gr_subctx *nvgpu_tsg_subctx_get_gr_subctx( + struct nvgpu_tsg_subctx *subctx) +{ + return subctx->gr_subctx; +} + +u32 nvgpu_tsg_subctx_get_id(struct nvgpu_tsg_subctx *subctx) +{ + return subctx->subctx_id; +} + +struct nvgpu_gr_ctx_mappings *nvgpu_tsg_subctx_alloc_or_get_mappings( + struct gk20a *g, + struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch) +{ + struct nvgpu_gr_ctx_mappings *mappings = NULL; + struct nvgpu_gr_subctx *gr_subctx = NULL; + struct vm_gk20a *vm = ch->vm; + + nvgpu_log(g, gpu_dbg_gr, " "); + + nvgpu_assert(ch->subctx != NULL); + nvgpu_assert(ch->subctx->vm == vm); + + mappings = nvgpu_gr_ctx_mappings_get_subctx_mappings(g, tsg, vm); + if (mappings != NULL) { + goto add_gr_subctx; + } + + mappings = nvgpu_gr_ctx_mappings_create_subctx_mappings(g, tsg, vm); + if (mappings == NULL) { + nvgpu_err(g, "failed to allocate gr_ctx mappings"); + return NULL; + } + +add_gr_subctx: + gr_subctx = nvgpu_tsg_subctx_get_gr_subctx(ch->subctx); + nvgpu_assert(gr_subctx != NULL); + + nvgpu_gr_ctx_mappings_add_gr_subctx(mappings, gr_subctx); + + nvgpu_log(g, gpu_dbg_gr, "done"); + + return mappings; +} + +#ifdef CONFIG_NVGPU_GFXP +static struct nvgpu_gr_ctx_mappings *nvgpu_tsg_subctx_get_veid0_mappings( + struct gk20a *g, + struct nvgpu_tsg *tsg) +{ + struct nvgpu_gr_ctx_mappings *mappings = NULL; + struct nvgpu_tsg_subctx *subctx = NULL; + + subctx = nvgpu_tsg_subctx_from_id(tsg, CHANNEL_INFO_VEID0); + if (subctx == NULL) { + nvgpu_log(g, gpu_dbg_gr, "VEID0 subctx not available"); + return NULL; + } + + mappings = nvgpu_gr_subctx_get_mappings(subctx->gr_subctx); + if (mappings == NULL) { + nvgpu_log(g, gpu_dbg_gr, "VEID0 mappings not available"); + return NULL; + } + + return mappings; +} + +void nvgpu_tsg_subctxs_set_preemption_buffer_va( + struct nvgpu_tsg_subctx *tsg_subctx) +{ + struct nvgpu_gr_ctx_mappings *veid0_mappings; + struct nvgpu_tsg_subctx *subctx = NULL; + struct nvgpu_tsg *tsg = tsg_subctx->tsg; + struct gk20a *g = tsg->g; + + nvgpu_log(g, gpu_dbg_gr, " "); + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + + veid0_mappings = nvgpu_tsg_subctx_get_veid0_mappings(g, tsg); + if (veid0_mappings == NULL) { + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + return; + } + + nvgpu_list_for_each_entry(subctx, &tsg->subctx_list, + nvgpu_tsg_subctx, tsg_entry) { + if (subctx->gr_subctx != NULL) { + nvgpu_gr_subctx_set_preemption_buffer_va(g, + subctx->gr_subctx, veid0_mappings); + } + } + + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + + nvgpu_log(g, gpu_dbg_gr, "done"); +} + +void nvgpu_tsg_subctxs_clear_preemption_buffer_va( + struct nvgpu_tsg_subctx *tsg_subctx) +{ + struct nvgpu_tsg_subctx *subctx = NULL; + struct nvgpu_tsg *tsg = tsg_subctx->tsg; + struct gk20a *g = tsg->g; + + nvgpu_log(g, gpu_dbg_gr, " "); + + nvgpu_list_for_each_entry(subctx, &tsg->subctx_list, + nvgpu_tsg_subctx, tsg_entry) { + if (subctx->gr_subctx != NULL) { + nvgpu_gr_subctx_clear_preemption_buffer_va(g, + subctx->gr_subctx); + } + } + + nvgpu_log(g, gpu_dbg_gr, "done"); +} +#endif /* CONFIG_NVGPU_GFXP */ + +#ifdef CONFIG_NVGPU_DEBUGGER +void nvgpu_tsg_subctxs_set_pm_buffer_va(struct nvgpu_tsg *tsg, + bool set_pm_ctx_gpu_va) +{ + struct nvgpu_tsg_subctx *subctx = NULL; + struct gk20a *g = tsg->g; + + nvgpu_log(g, gpu_dbg_gr, " "); + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(subctx, &tsg->subctx_list, + nvgpu_tsg_subctx, tsg_entry) { + if (subctx->gr_subctx != NULL) { + nvgpu_gr_subctx_set_hwpm_ptr(g, subctx->gr_subctx, + set_pm_ctx_gpu_va); + } + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + + nvgpu_log(g, gpu_dbg_gr, "done"); +} +#endif /* CONFIG_NVGPU_DEBUGGER */ diff --git a/drivers/gpu/nvgpu/common/fifo/tsg_subctx_priv.h b/drivers/gpu/nvgpu/common/fifo/tsg_subctx_priv.h new file mode 100644 index 000000000..ed6376148 --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/tsg_subctx_priv.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_COMMON_FIFO_TSG_SUBCTX_PRIV_H +#define NVGPU_COMMON_FIFO_TSG_SUBCTX_PRIV_H + +#include +#include + +struct nvgpu_tsg; +struct vm_gk20a; +struct nvgpu_gr_subctx; + +struct nvgpu_tsg_subctx { + + /** Subcontext Id (aka. veid). */ + u32 subctx_id; + + /** TSG to which this subcontext belongs. */ + struct nvgpu_tsg *tsg; + + /** Subcontext's address space. */ + struct vm_gk20a *vm; + + /** Subcontext's GR ctx header and GR ctx buffers mappings. */ + struct nvgpu_gr_subctx *gr_subctx; + + /** + * Subcontext's entry in TSG's (#nvgpu_tsg) subcontexts list + * #subctx_list. + */ + struct nvgpu_list_node tsg_entry; + + /** + * List of channels (#nvgpu_channel) bound to this TSG subcontext. + * Accessed by holding #ch_list_lock from TSG. + */ + struct nvgpu_list_node ch_list; +}; + +#endif /* NVGPU_COMMON_FIFO_TSG_SUBCTX_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/gr/ctx.c b/drivers/gpu/nvgpu/common/gr/ctx.c index 541e5a234..5616d5432 100644 --- a/drivers/gpu/nvgpu/common/gr/ctx.c +++ b/drivers/gpu/nvgpu/common/gr/ctx.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "common/gr/ctx_priv.h" @@ -116,9 +117,7 @@ int nvgpu_gr_ctx_alloc_ctx_buffers(struct gk20a *g, } #endif - if (desc->size[i] != 0U) { - nvgpu_assert(!nvgpu_mem_is_valid(&ctx->mem[i])); - + if (desc->size[i] != 0U && !nvgpu_mem_is_valid(&ctx->mem[i])) { err = nvgpu_dma_alloc_sys(g, desc->size[i], &ctx->mem[i]); if (err != 0) { @@ -126,10 +125,14 @@ int nvgpu_gr_ctx_alloc_ctx_buffers(struct gk20a *g, nvgpu_gr_ctx_free_ctx_buffers(g, ctx); return err; } + + nvgpu_log(g, gpu_dbg_gr, "ctx buffer %u allocated", i); } } - ctx->ctx_id_valid = false; + if (!nvgpu_gr_ctx_get_ctx_initialized(ctx)) { + ctx->ctx_id_valid = false; + } nvgpu_log(g, gpu_dbg_gr, "done"); @@ -206,6 +209,8 @@ int nvgpu_gr_ctx_alloc_ctx_preemption_buffers(struct gk20a *g, nvgpu_gr_ctx_free_ctx_preemption_buffers(g, ctx); return err; } + + nvgpu_log(g, gpu_dbg_gr, "ctx preemption buffer %u allocated", i); } } @@ -219,32 +224,54 @@ void nvgpu_gr_ctx_free(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer) { + struct nvgpu_tsg *tsg; + nvgpu_log(g, gpu_dbg_gr, " "); - if ((gr_ctx != NULL) && (gr_ctx->mappings != NULL)) { - nvgpu_gr_ctx_unmap_buffers(g, - gr_ctx, global_ctx_buffer, gr_ctx->mappings); + if (gr_ctx != NULL) { + tsg = nvgpu_tsg_get_from_id(g, gr_ctx->tsgid); - nvgpu_gr_ctx_free_mappings(g, gr_ctx); + nvgpu_mutex_acquire(&tsg->ctx_init_lock); + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + nvgpu_assert(nvgpu_list_empty(&tsg->ch_list)); + nvgpu_assert(nvgpu_list_empty(&tsg->subctx_list)); + nvgpu_assert(nvgpu_list_empty(&tsg->gr_ctx_mappings_list)); + } else { + if (gr_ctx->mappings != NULL) { + nvgpu_gr_ctx_unmap_buffers(g, + gr_ctx, NULL, global_ctx_buffer, + gr_ctx->mappings); + + nvgpu_gr_ctx_free_mappings(g, gr_ctx); + } + } nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0); nvgpu_gr_ctx_free_ctx_buffers(g, gr_ctx); (void) memset(gr_ctx, 0, sizeof(*gr_ctx)); + + nvgpu_mutex_release(&tsg->ctx_init_lock); } nvgpu_log(g, gpu_dbg_gr, "done"); } struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_alloc_or_get_mappings(struct gk20a *g, - struct nvgpu_tsg *tsg, struct vm_gk20a *vm) + struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) { struct nvgpu_gr_ctx_mappings *mappings = NULL; struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx; + struct vm_gk20a *vm = ch->vm; nvgpu_log(g, gpu_dbg_gr, " "); + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + return nvgpu_tsg_subctx_alloc_or_get_mappings(g, tsg, ch); + } + mappings = gr_ctx->mappings; if (mappings != NULL) { return mappings; @@ -278,11 +305,16 @@ void nvgpu_gr_ctx_free_mappings(struct gk20a *g, nvgpu_log(g, gpu_dbg_gr, "done"); } -struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_get_mappings(struct nvgpu_tsg *tsg) +struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_get_mappings(struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch) { - struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx; + struct gk20a *g = tsg->g; - return gr_ctx->mappings; + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + return nvgpu_gr_ctx_mappings_get_subctx_mappings(g, tsg, ch->vm); + } + + return tsg->gr_ctx->mappings; } void nvgpu_gr_ctx_set_patch_ctx_data_count(struct nvgpu_gr_ctx *gr_ctx, @@ -639,9 +671,9 @@ int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, #ifdef CONFIG_NVGPU_GFXP void nvgpu_gr_ctx_set_preemption_buffer_va(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_ctx_mappings *mappings) + struct nvgpu_gr_ctx *gr_ctx) { + struct nvgpu_gr_ctx_mappings *mappings = gr_ctx->mappings; struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; u64 preempt_ctxsw_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PREEMPT_CTXSW); @@ -744,12 +776,6 @@ int nvgpu_gr_ctx_alloc_map_pm_ctx(struct gk20a *g, return 0; } - mappings = nvgpu_gr_ctx_get_mappings(tsg); - if (mappings == NULL) { - nvgpu_err(g, "gr_ctx mappings struct not allocated"); - return -ENOMEM; - } - nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_PM_CTX, nvgpu_gr_hwpm_map_get_size(hwpm_map)); @@ -761,8 +787,25 @@ int nvgpu_gr_ctx_alloc_map_pm_ctx(struct gk20a *g, return ret; } - ret = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, gr_ctx, - NVGPU_GR_CTX_PM_CTX, mappings); + /* + * Commit NVGPU_GR_CTX_PM_CTX gpu va for all subcontexts + * when subcontexts are enabled. + */ + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + ret = nvgpu_gr_ctx_mappings_map_buffer_all_subctx(tsg, + NVGPU_GR_CTX_PM_CTX); + } else { + mappings = nvgpu_gr_ctx_get_mappings(tsg, NULL); + if (mappings == NULL) { + nvgpu_err(g, "gr_ctx mappings struct not allocated"); + nvgpu_gr_ctx_free_pm_ctx(g, gr_ctx); + return -ENOMEM; + } + + ret = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, gr_ctx, + NVGPU_GR_CTX_PM_CTX, mappings); + } + if (ret != 0) { nvgpu_err(g, "gr_ctx pm_ctx buffer map failed %d", ret); nvgpu_gr_ctx_free_pm_ctx(g, gr_ctx); @@ -839,13 +882,13 @@ int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, - u32 mode, u64 *pm_ctx_gpu_va, bool *skip_update) + u32 mode, bool *set_pm_ctx_gpu_va, bool *skip_update) { - struct nvgpu_gr_ctx_mappings *mappings = gr_ctx->mappings; struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx; int ret = 0; + *set_pm_ctx_gpu_va = false; *skip_update = false; if (!nvgpu_mem_is_valid(mem)) { @@ -868,8 +911,7 @@ int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, return 0; } pm_ctx->pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw(); - *pm_ctx_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, - NVGPU_GR_CTX_PM_CTX); + *set_pm_ctx_gpu_va = true; break; case NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW: if (pm_ctx->pm_mode == @@ -879,7 +921,7 @@ int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, } pm_ctx->pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw(); - *pm_ctx_gpu_va = 0; + *set_pm_ctx_gpu_va = false; break; case NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW: if (pm_ctx->pm_mode == @@ -889,8 +931,7 @@ int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, } pm_ctx->pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw(); - *pm_ctx_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, - NVGPU_GR_CTX_PM_CTX); + *set_pm_ctx_gpu_va = true; break; default: nvgpu_err(g, "invalid hwpm context switch mode"); @@ -909,9 +950,16 @@ void nvgpu_gr_ctx_set_hwpm_pm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) } void nvgpu_gr_ctx_set_hwpm_ptr(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, - u64 pm_ctx_gpu_va) + bool set_pm_ctx_gpu_va) { struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; + u64 pm_ctx_gpu_va = 0ULL; + + if (set_pm_ctx_gpu_va) { + pm_ctx_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va( + gr_ctx->mappings, + NVGPU_GR_CTX_PM_CTX); + } g->ops.gr.ctxsw_prog.set_pm_ptr(g, mem, pm_ctx_gpu_va); } @@ -921,3 +969,47 @@ void nvgpu_gr_ctx_set_pm_ctx_mapped(struct nvgpu_gr_ctx *ctx, bool mapped) ctx->pm_ctx.mapped = mapped; } #endif /* CONFIG_NVGPU_DEBUGGER */ + +bool nvgpu_gr_obj_ctx_global_ctx_buffers_patched(struct nvgpu_gr_ctx *gr_ctx) +{ + return gr_ctx->global_ctx_buffers_patched; +} + +void nvgpu_gr_obj_ctx_set_global_ctx_buffers_patched( + struct nvgpu_gr_ctx *gr_ctx, bool patched) +{ + gr_ctx->global_ctx_buffers_patched = patched; +} + +bool nvgpu_gr_obj_ctx_preempt_buffers_patched(struct nvgpu_gr_ctx *gr_ctx) +{ + return gr_ctx->preempt_buffers_patched; +} + +void nvgpu_gr_obj_ctx_set_preempt_buffers_patched( + struct nvgpu_gr_ctx *gr_ctx, bool patched) +{ + gr_ctx->preempt_buffers_patched = patched; +} + +bool nvgpu_gr_obj_ctx_default_compute_regs_patched(struct nvgpu_gr_ctx *gr_ctx) +{ + return gr_ctx->default_compute_regs_patched; +} + +void nvgpu_gr_obj_ctx_set_default_compute_regs_patched( + struct nvgpu_gr_ctx *gr_ctx, bool patched) +{ + gr_ctx->default_compute_regs_patched = patched; +} + +bool nvgpu_gr_obj_ctx_default_gfx_regs_patched(struct nvgpu_gr_ctx *gr_ctx) +{ + return gr_ctx->default_gfx_regs_patched; +} + +void nvgpu_gr_obj_ctx_set_default_gfx_regs_patched( + struct nvgpu_gr_ctx *gr_ctx, bool patched) +{ + gr_ctx->default_gfx_regs_patched = patched; +} diff --git a/drivers/gpu/nvgpu/common/gr/ctx_mappings.c b/drivers/gpu/nvgpu/common/gr/ctx_mappings.c index 541066a11..cfa60afe5 100644 --- a/drivers/gpu/nvgpu/common/gr/ctx_mappings.c +++ b/drivers/gpu/nvgpu/common/gr/ctx_mappings.c @@ -22,17 +22,38 @@ #include #include +#include +#include #include #include +#include #include #include #include #include #include #include +#include +#include +#include #include #include "common/gr/ctx_mappings_priv.h" +#include "common/gr/subctx_priv.h" + +static inline struct nvgpu_gr_ctx_mappings * +nvgpu_gr_ctx_mappings_from_tsg_entry(struct nvgpu_list_node *node) +{ + return (struct nvgpu_gr_ctx_mappings *) + ((uintptr_t)node - offsetof(struct nvgpu_gr_ctx_mappings, tsg_entry)); +}; + +static inline struct nvgpu_gr_subctx * +nvgpu_gr_subctx_from_gr_ctx_mappings_entry(struct nvgpu_list_node *node) +{ + return (struct nvgpu_gr_subctx *) + ((uintptr_t)node - offsetof(struct nvgpu_gr_subctx, gr_ctx_mappings_entry)); +}; struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_mappings_create(struct gk20a *g, struct nvgpu_tsg *tsg, struct vm_gk20a *vm) @@ -83,11 +104,14 @@ int nvgpu_gr_ctx_mappings_map_ctx_buffer(struct gk20a *g, nvgpu_log(g, gpu_dbg_gr, " "); + if (mappings->ctx_buffer_va[index] != 0ULL) { + nvgpu_log_info(g, "buffer %u already mapped", index); + return 0; + } + mem = nvgpu_gr_ctx_get_ctx_mem(ctx, index); mapping_flags = nvgpu_gr_ctx_get_ctx_mapping_flags(ctx, index); - nvgpu_assert(mappings->ctx_buffer_va[index] == 0ULL); - if (nvgpu_mem_is_valid(mem)) { gpu_va = nvgpu_gmmu_map(vm, mem, @@ -138,28 +162,149 @@ static void nvgpu_gr_ctx_mappings_unmap_ctx_buffer(struct nvgpu_gr_ctx *ctx, } } +static void nvgpu_gr_ctx_mappings_unmap_buffer_all_subctx( + struct nvgpu_tsg *tsg, u32 index) +{ + struct nvgpu_gr_ctx_mappings *mappings = NULL; + struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx; + + nvgpu_assert(index < NVGPU_GR_CTX_COUNT); + + nvgpu_list_for_each_entry(mappings, &tsg->gr_ctx_mappings_list, + nvgpu_gr_ctx_mappings, tsg_entry) { + nvgpu_gr_ctx_mappings_unmap_ctx_buffer(gr_ctx, + index, mappings); + } +} + +int nvgpu_gr_ctx_mappings_map_buffer_all_subctx( + struct nvgpu_tsg *tsg, u32 index) +{ + struct nvgpu_gr_ctx_mappings *mappings = NULL; + struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx; + struct gk20a *g = tsg->g; + int err; + + nvgpu_assert(index < NVGPU_GR_CTX_COUNT); + + nvgpu_list_for_each_entry(mappings, &tsg->gr_ctx_mappings_list, + nvgpu_gr_ctx_mappings, tsg_entry) { + err = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, gr_ctx, + index, mappings); + if (err != 0) { + nvgpu_err(g, "gr_ctx buffer %u map failed %d", index, err); + nvgpu_gr_ctx_mappings_unmap_buffer_all_subctx(tsg, index); + return err; + } + + } + + return 0; +} + static void nvgpu_gr_ctx_mappings_unmap_ctx_buffers(struct nvgpu_gr_ctx *ctx, + struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings) { + u32 buffers_count = NVGPU_GR_CTX_COUNT; u32 i; +#ifdef CONFIG_NVGPU_GFXP + struct nvgpu_tsg *tsg = mappings->tsg; + struct gk20a *g = tsg->g; + bool is_sync_veid; + bool gfxp_active; +#endif - for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) { + (void) subctx; + +#ifdef CONFIG_NVGPU_GFXP + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + is_sync_veid = nvgpu_tsg_subctx_get_id(subctx) == + CHANNEL_INFO_VEID0; + gfxp_active = (nvgpu_gr_ctx_get_graphics_preemption_mode(ctx) == + NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP); + + if (is_sync_veid && gfxp_active) { + nvgpu_gr_ctx_mappings_unmap_buffer_all_subctx(tsg, + NVGPU_GR_CTX_PREEMPT_CTXSW); + nvgpu_tsg_subctxs_clear_preemption_buffer_va(subctx); + nvgpu_gr_ctx_init_graphics_preemption_mode(ctx, + NVGPU_PREEMPTION_MODE_GRAPHICS_WFI); + } + + if (!is_sync_veid) { + if (gfxp_active) { + nvgpu_gr_subctx_clear_preemption_buffer_va(g, + nvgpu_tsg_subctx_get_gr_subctx(subctx)); + buffers_count = NVGPU_GR_CTX_PREEMPT_CTXSW + 1U; + } else { + buffers_count = NVGPU_GR_CTX_PATCH_CTX + 1U; + } + } + } +#endif + + for (i = 0; i < buffers_count; i++) { nvgpu_gr_ctx_mappings_unmap_ctx_buffer(ctx, i, mappings); } } static int nvgpu_gr_ctx_mappings_map_ctx_buffers(struct gk20a *g, struct nvgpu_gr_ctx *ctx, + struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings) { + u32 buffers_count = NVGPU_GR_CTX_COUNT; int err = 0; u32 i; +#ifdef CONFIG_NVGPU_GFXP + struct nvgpu_tsg *tsg = mappings->tsg; + bool is_sync_veid; + bool gfxp_active; +#endif - for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) { + (void) subctx; + +#ifdef CONFIG_NVGPU_GFXP + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + is_sync_veid = nvgpu_tsg_subctx_get_id(subctx) == + CHANNEL_INFO_VEID0; + gfxp_active = (nvgpu_gr_ctx_get_graphics_preemption_mode(ctx) == + NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP); + + if (is_sync_veid && gfxp_active) { + err = nvgpu_gr_ctx_mappings_map_buffer_all_subctx(tsg, + NVGPU_GR_CTX_PREEMPT_CTXSW); + if (err != 0) { + nvgpu_err(g, "preempt buffer mapping failed %d", + err); + nvgpu_gr_ctx_mappings_unmap_buffer_all_subctx( + tsg, NVGPU_GR_CTX_PREEMPT_CTXSW); + return err; + } + } + + /* + * Only NVGPU_GR_CTX_PREEMPT_CTXSW is to be mapped for + * all VEIDs. + * Don't map other preemption buffers for ASYNC VEIDs. + */ + if (!is_sync_veid) { + if (gfxp_active) { + buffers_count = NVGPU_GR_CTX_PREEMPT_CTXSW + 1U; + } else { + buffers_count = NVGPU_GR_CTX_PATCH_CTX + 1U; + } + } + } +#endif + + for (i = 0; i < buffers_count; i++) { err = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, ctx, i, mappings); if (err != 0) { nvgpu_err(g, "gr_ctx buffer %u map failed %d", i, err); - nvgpu_gr_ctx_mappings_unmap_ctx_buffers(ctx, mappings); + nvgpu_gr_ctx_mappings_unmap_ctx_buffers(ctx, + subctx, mappings); return err; } } @@ -170,35 +315,96 @@ static int nvgpu_gr_ctx_mappings_map_ctx_buffers(struct gk20a *g, #ifdef CONFIG_NVGPU_GFXP static void nvgpu_gr_ctx_mappings_unmap_ctx_preemption_buffers( struct nvgpu_gr_ctx *ctx, + struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings) { + u32 buffers_count = NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; + struct nvgpu_tsg *tsg = mappings->tsg; + struct gk20a *g = tsg->g; + bool is_sync_veid; + bool gfxp_active; u32 i; - for (i = NVGPU_GR_CTX_PREEMPT_CTXSW; - i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) { + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + is_sync_veid = nvgpu_tsg_subctx_get_id(subctx) == + CHANNEL_INFO_VEID0; + gfxp_active = (nvgpu_gr_ctx_get_graphics_preemption_mode(ctx) == + NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP); + + if (is_sync_veid && gfxp_active) { + nvgpu_gr_ctx_mappings_unmap_buffer_all_subctx(tsg, + NVGPU_GR_CTX_PREEMPT_CTXSW); + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_tsg_subctxs_clear_preemption_buffer_va(subctx); + nvgpu_rwsem_up_read(&tsg->ch_list_lock); + + nvgpu_gr_ctx_init_graphics_preemption_mode(ctx, + NVGPU_PREEMPTION_MODE_GRAPHICS_WFI); + } + + if (!is_sync_veid) { + if (gfxp_active) { + nvgpu_gr_subctx_clear_preemption_buffer_va(g, + nvgpu_tsg_subctx_get_gr_subctx(subctx)); + buffers_count = NVGPU_GR_CTX_PREEMPT_CTXSW; + } else { + return; + } + } + } + + for (i = NVGPU_GR_CTX_PREEMPT_CTXSW; i <= buffers_count; i++) { nvgpu_gr_ctx_mappings_unmap_ctx_buffer(ctx, i, mappings); } } int nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(struct gk20a *g, struct nvgpu_gr_ctx *ctx, + struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings) { + u32 buffers_count = NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; + struct nvgpu_tsg *tsg = mappings->tsg; + bool is_sync_veid; + bool gfxp_active; int err = 0; u32 i; - nvgpu_log(g, gpu_dbg_gr, " "); + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + is_sync_veid = nvgpu_tsg_subctx_get_id(subctx) == + CHANNEL_INFO_VEID0; + gfxp_active = (nvgpu_gr_ctx_get_graphics_preemption_mode(ctx) == + NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP); - for (i = NVGPU_GR_CTX_PREEMPT_CTXSW; - i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) { - if (mappings->ctx_buffer_va[i] == 0ULL) { - err = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, ctx, i, mappings); + if (is_sync_veid && gfxp_active) { + err = nvgpu_gr_ctx_mappings_map_buffer_all_subctx(tsg, + NVGPU_GR_CTX_PREEMPT_CTXSW); if (err != 0) { - nvgpu_err(g, "gr_ctx buffer %u map failed %d", i, err); - nvgpu_gr_ctx_mappings_unmap_ctx_preemption_buffers(ctx, mappings); + nvgpu_err(g, "preempt buffer mapping failed %d", err); + nvgpu_gr_ctx_mappings_unmap_buffer_all_subctx(tsg, + NVGPU_GR_CTX_PREEMPT_CTXSW); return err; } } + + if (!is_sync_veid) { + if (gfxp_active) { + buffers_count = NVGPU_GR_CTX_PREEMPT_CTXSW; + } else { + return 0; + } + } + } + + for (i = NVGPU_GR_CTX_PREEMPT_CTXSW; i <= buffers_count; i++) { + err = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, ctx, i, mappings); + if (err != 0) { + nvgpu_err(g, "gr_ctx buffer %u map failed %d", i, err); + nvgpu_gr_ctx_mappings_unmap_ctx_preemption_buffers(ctx, + subctx, mappings); + return err; + } } nvgpu_log(g, gpu_dbg_gr, "done"); @@ -273,7 +479,9 @@ static void nvgpu_gr_ctx_mappings_unmap_global_ctx_buffers( static int nvgpu_gr_ctx_mappings_map_global_ctx_buffers(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, - struct nvgpu_gr_ctx_mappings *mappings, bool vpr) + struct nvgpu_tsg_subctx *subctx, + struct nvgpu_gr_ctx_mappings *mappings, + bool vpr) { int err; @@ -282,7 +490,7 @@ static int nvgpu_gr_ctx_mappings_map_global_ctx_buffers(struct gk20a *g, * Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB * if 2D/3D/I2M classes(graphics) are supported. */ - if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + if (nvgpu_gr_obj_ctx_is_gfx_engine(g, subctx)) { /* Circular Buffer */ err = nvgpu_gr_ctx_mappings_map_global_ctx_buffer( global_ctx_buffer, @@ -388,7 +596,7 @@ fail: } int nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_ctx_mappings *mappings, bool vpr) @@ -403,17 +611,17 @@ int nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(struct gk20a *g, return -EINVAL; } - err = nvgpu_gr_ctx_mappings_map_ctx_buffers(g, gr_ctx, mappings); + err = nvgpu_gr_ctx_mappings_map_ctx_buffers(g, gr_ctx, subctx, mappings); if (err != 0) { nvgpu_err(g, "fail to map ctx buffers"); return err; } err = nvgpu_gr_ctx_mappings_map_global_ctx_buffers(g, - global_ctx_buffer, mappings, vpr); + global_ctx_buffer, subctx, mappings, vpr); if (err != 0) { nvgpu_err(g, "fail to map global ctx buffer"); - nvgpu_gr_ctx_mappings_unmap_ctx_buffers(gr_ctx, mappings); + nvgpu_gr_ctx_mappings_unmap_ctx_buffers(gr_ctx, subctx, mappings); return err; } @@ -424,6 +632,7 @@ int nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(struct gk20a *g, void nvgpu_gr_ctx_unmap_buffers(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_ctx_mappings *mappings) { @@ -432,7 +641,7 @@ void nvgpu_gr_ctx_unmap_buffers(struct gk20a *g, nvgpu_gr_ctx_mappings_unmap_global_ctx_buffers(global_ctx_buffer, mappings); - nvgpu_gr_ctx_mappings_unmap_ctx_buffers(gr_ctx, mappings); + nvgpu_gr_ctx_mappings_unmap_ctx_buffers(gr_ctx, subctx, mappings); nvgpu_log(g, gpu_dbg_gr, "done"); } @@ -450,3 +659,118 @@ u64 nvgpu_gr_ctx_mappings_get_ctx_va(struct nvgpu_gr_ctx_mappings *mappings, nvgpu_assert(index < NVGPU_GR_CTX_COUNT); return mappings->ctx_buffer_va[index]; } + +struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_mappings_get_subctx_mappings( + struct gk20a *g, + struct nvgpu_tsg *tsg, + struct vm_gk20a *vm) +{ + struct nvgpu_gr_ctx_mappings *mappings = NULL; + + nvgpu_log(g, gpu_dbg_gr, " "); + + nvgpu_list_for_each_entry(mappings, &tsg->gr_ctx_mappings_list, + nvgpu_gr_ctx_mappings, tsg_entry) { + if (mappings->vm == vm) { + return mappings; + } + } + + nvgpu_log(g, gpu_dbg_gr, "done"); + + return NULL; +} + +struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_mappings_create_subctx_mappings( + struct gk20a *g, + struct nvgpu_tsg *tsg, + struct vm_gk20a *vm) +{ + struct nvgpu_gr_ctx_mappings *mappings = NULL; + + nvgpu_log(g, gpu_dbg_gr, " "); + + mappings = (struct nvgpu_gr_ctx_mappings *) + nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_ctx_mappings)); + if (mappings == NULL) { + nvgpu_err(g, "failed to alloc mappings"); + return NULL; + } + + nvgpu_vm_get(vm); + mappings->tsg = tsg; + mappings->vm = vm; + + nvgpu_init_list_node(&mappings->tsg_entry); + nvgpu_init_list_node(&mappings->subctx_list); + + /* add mappings to the list in the tsg */ + nvgpu_list_add_tail(&mappings->tsg_entry, + &tsg->gr_ctx_mappings_list); + + nvgpu_log(g, gpu_dbg_gr, "done"); + + return mappings; +} + +void nvgpu_gr_ctx_mappings_add_gr_subctx(struct nvgpu_gr_ctx_mappings *mappings, + struct nvgpu_gr_subctx *subctx) +{ + struct nvgpu_gr_subctx *subctx_iter = NULL; + struct nvgpu_tsg *tsg = mappings->tsg; + struct gk20a *g = tsg->g; + bool found = false; + + nvgpu_log(g, gpu_dbg_gr, " "); + + nvgpu_list_for_each_entry(subctx_iter, &mappings->subctx_list, + nvgpu_gr_subctx, gr_ctx_mappings_entry) { + if (subctx_iter == subctx) { + found = true; + goto out; + } + } + +out: + if (!found) { + subctx->mappings = mappings; + nvgpu_list_add_tail(&subctx->gr_ctx_mappings_entry, + &mappings->subctx_list); + } + + nvgpu_log(g, gpu_dbg_gr, "done"); +} + +void nvgpu_gr_ctx_mappings_free_subctx_mappings(struct nvgpu_tsg_subctx *subctx, + struct nvgpu_gr_ctx_mappings *mappings, bool unmap) +{ + struct nvgpu_tsg *tsg = mappings->tsg; + struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx; + struct gk20a *g = tsg->g; + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer = + nvgpu_gr_get_global_ctx_buffer_ptr(g); + bool is_sync_veid; + + nvgpu_log(g, gpu_dbg_gr, " "); + + if (nvgpu_list_empty(&mappings->subctx_list)) { + if (unmap) { + nvgpu_gr_ctx_unmap_buffers(g, + gr_ctx, subctx, global_ctx_buffer, mappings); + } + + /* remove mappings from the list in the tsg */ + nvgpu_list_del(&mappings->tsg_entry); + + nvgpu_gr_ctx_mappings_free(g, mappings); + } + + is_sync_veid = nvgpu_tsg_subctx_get_id(subctx) == CHANNEL_INFO_VEID0; + + if (is_sync_veid) { + nvgpu_gr_obj_ctx_set_global_ctx_buffers_patched(gr_ctx, false); + nvgpu_gr_obj_ctx_set_preempt_buffers_patched(gr_ctx, false); + } + + nvgpu_log(g, gpu_dbg_gr, "done"); +} diff --git a/drivers/gpu/nvgpu/common/gr/ctx_mappings_priv.h b/drivers/gpu/nvgpu/common/gr/ctx_mappings_priv.h index 34b3e6722..fe857d830 100644 --- a/drivers/gpu/nvgpu/common/gr/ctx_mappings_priv.h +++ b/drivers/gpu/nvgpu/common/gr/ctx_mappings_priv.h @@ -53,5 +53,16 @@ struct nvgpu_gr_ctx_mappings { * corresponding to GPU virtual addresses above. */ u32 global_ctx_buffer_index[NVGPU_GR_GLOBAL_CTX_VA_COUNT]; + + /** + * GR ctx mappings' entry in TSG's (#nvgpu_tsg) mappings list + * #gr_ctx_mappings_list. + */ + struct nvgpu_list_node tsg_entry; + + /** + * List of GR subcontexts (#nvgpu_gr_subctx) using this mapping. + */ + struct nvgpu_list_node subctx_list; }; #endif /* NVGPU_GR_CTX_MAPPINGS_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/gr/ctx_priv.h b/drivers/gpu/nvgpu/common/gr/ctx_priv.h index 887fe4731..6ef818470 100644 --- a/drivers/gpu/nvgpu/common/gr/ctx_priv.h +++ b/drivers/gpu/nvgpu/common/gr/ctx_priv.h @@ -160,6 +160,11 @@ struct nvgpu_gr_ctx { */ u32 sm_diversity_config; #endif + + bool global_ctx_buffers_patched; + bool preempt_buffers_patched; + bool default_compute_regs_patched; + bool default_gfx_regs_patched; }; #endif /* NVGPU_GR_CTX_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/gr/gr_setup.c b/drivers/gpu/nvgpu/common/gr/gr_setup.c index 3b0f0daac..6c5c9005d 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_setup.c +++ b/drivers/gpu/nvgpu/common/gr/gr_setup.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "gr_priv.h" @@ -140,22 +141,6 @@ static int nvgpu_gr_setup_validate_channel_and_class(struct gk20a *g, return err; } -static int nvgpu_gr_setup_alloc_subctx(struct gk20a *g, struct nvgpu_channel *c) -{ - int err = 0; - - if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { - if (c->subctx == NULL) { - c->subctx = nvgpu_gr_subctx_alloc(g, c->vm); - if (c->subctx == NULL) { - err = -ENOMEM; - } - } - } - - return err; -} - int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, u32 flags) { @@ -165,6 +150,9 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, int err = 0; struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); struct nvgpu_gr_ctx_mappings *mappings = NULL; +#ifdef CONFIG_NVGPU_FECS_TRACE + struct nvgpu_gr_subctx *gr_subctx = NULL; +#endif nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "GR%u: allocate object context for channel %u", @@ -195,54 +183,53 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, return -EINVAL; } - err = nvgpu_gr_setup_alloc_subctx(g, c); + nvgpu_mutex_acquire(&tsg->ctx_init_lock); + + err = nvgpu_tsg_subctx_alloc_gr_subctx(g, c); if (err != 0) { - nvgpu_err(g, "failed to allocate gr subctx buffer"); + nvgpu_err(g, "failed to alloc gr subctx"); + nvgpu_mutex_release(&tsg->ctx_init_lock); goto out; } - nvgpu_mutex_acquire(&tsg->ctx_init_lock); + err = nvgpu_tsg_subctx_setup_subctx_header(g, c); + if (err != 0) { + nvgpu_err(g, "failed to setup subctx header"); + nvgpu_mutex_release(&tsg->ctx_init_lock); + goto out; + } gr_ctx = tsg->gr_ctx; - mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, c->vm); + mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, c); if (mappings == NULL) { nvgpu_err(g, "fail to allocate/get ctx mappings struct"); nvgpu_mutex_release(&tsg->ctx_init_lock); goto out; } - if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx, - NVGPU_GR_CTX_CTX))) { - tsg->vm = c->vm; - nvgpu_vm_get(tsg->vm); - - err = nvgpu_gr_obj_ctx_alloc(g, gr->golden_image, - gr->global_ctx_buffer, gr->gr_ctx_desc, - gr->config, gr_ctx, c->subctx, - mappings, &c->inst_block, class_num, flags, - c->cde, c->vpr); - if (err != 0) { - nvgpu_err(g, - "failed to allocate gr ctx buffer"); - nvgpu_gr_ctx_free_mappings(g, gr_ctx); - nvgpu_mutex_release(&tsg->ctx_init_lock); - nvgpu_vm_put(tsg->vm); - tsg->vm = NULL; - goto out; - } - - nvgpu_gr_ctx_set_tsgid(gr_ctx, tsg->tsgid); - } else { - /* commit gr ctx buffer */ - nvgpu_gr_obj_ctx_commit_inst(g, &c->inst_block, gr_ctx, - c->subctx, mappings); + err = nvgpu_gr_obj_ctx_alloc(g, gr->golden_image, + gr->global_ctx_buffer, gr->gr_ctx_desc, + gr->config, gr_ctx, c->subctx, + mappings, &c->inst_block, class_num, flags, + c->cde, c->vpr); + if (err != 0) { + nvgpu_err(g, + "failed to allocate gr ctx buffer"); + nvgpu_mutex_release(&tsg->ctx_init_lock); + goto out; } + nvgpu_gr_ctx_set_tsgid(gr_ctx, tsg->tsgid); + #ifdef CONFIG_NVGPU_FECS_TRACE if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) { + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + gr_subctx = nvgpu_tsg_subctx_get_gr_subctx(c->subctx); + } + err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block, - c->subctx, gr_ctx, mappings, tsg->tgid, 0); + gr_subctx, gr_ctx, mappings, tsg->tgid, 0); if (err != 0) { nvgpu_warn(g, "fail to bind channel for ctxsw trace"); @@ -274,11 +261,6 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); return 0; out: - if (c->subctx != NULL) { - nvgpu_gr_subctx_free(g, c->subctx, c->vm); - c->subctx = NULL; - } - /* 1. gr_ctx, patch_ctx and global ctx buffer mapping can be reused so no need to release them. 2. golden image init and load is a one time thing so if @@ -320,13 +302,12 @@ void nvgpu_gr_setup_free_subctx(struct nvgpu_channel *c) return; } - if (c->subctx != NULL) { - nvgpu_gr_subctx_free(c->g, c->subctx, c->vm); - c->subctx = NULL; - } + nvgpu_gr_subctx_free(c->g, c->subctx, c->vm, true); + + nvgpu_log_fn(c->g, "done"); } -static bool nvgpu_gr_setup_validate_preemption_mode(u32 *graphics_preempt_mode, +bool nvgpu_gr_setup_validate_preemption_mode(u32 *graphics_preempt_mode, u32 *compute_preempt_mode, struct nvgpu_gr_ctx *gr_ctx) { @@ -383,9 +364,19 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch, gr_ctx = tsg->gr_ctx; + nvgpu_mutex_acquire(&tsg->ctx_init_lock); + + g->ops.tsg.disable(tsg); + + err = nvgpu_preempt_channel(g, ch); + if (err != 0) { + nvgpu_err(g, "failed to preempt channel/TSG"); + goto enable_ch; + } + if (nvgpu_gr_setup_validate_preemption_mode(&graphics_preempt_mode, &compute_preempt_mode, gr_ctx) == false) { - return 0; + goto enable_ch; } nvgpu_log(g, gpu_dbg_gr | gpu_dbg_sched, "chid=%d tsgid=%d pid=%d " @@ -398,13 +389,14 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch, graphics_preempt_mode, compute_preempt_mode); if (err != 0) { nvgpu_err(g, "set_ctxsw_preemption_mode failed"); - return err; + goto enable_ch; } - mappings = nvgpu_gr_ctx_get_mappings(tsg); + mappings = nvgpu_gr_ctx_get_mappings(tsg, ch); if (mappings == NULL) { nvgpu_err(g, "failed to get gr_ctx mappings"); - return -EINVAL; + err = -EINVAL; + goto enable_ch; } #ifdef CONFIG_NVGPU_GFXP @@ -412,29 +404,21 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch, gr->gr_ctx_desc, gr_ctx); if (err != 0) { nvgpu_err(g, "fail to allocate ctx preemption buffers"); - return err; + goto enable_ch; } err = nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(g, - gr_ctx, mappings); + gr_ctx, ch->subctx, mappings); if (err != 0) { nvgpu_err(g, "fail to map ctx preemption buffers"); - return err; - } - #endif - - g->ops.tsg.disable(tsg); - - err = nvgpu_preempt_channel(g, ch); - if (err != 0) { - nvgpu_err(g, "failed to preempt channel/TSG"); goto enable_ch; } + #endif nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, gr->config, gr_ctx, ch->subctx, mappings); - if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + if (nvgpu_gr_obj_ctx_is_gfx_engine(g, ch->subctx)) { nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true); g->ops.gr.init.commit_global_cb_manager(g, gr->config, gr_ctx, true); @@ -443,9 +427,12 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch, g->ops.tsg.enable(tsg); + nvgpu_mutex_release(&tsg->ctx_init_lock); + return err; enable_ch: g->ops.tsg.enable(tsg); + nvgpu_mutex_release(&tsg->ctx_init_lock); return err; } diff --git a/drivers/gpu/nvgpu/common/gr/gr_utils.c b/drivers/gpu/nvgpu/common/gr/gr_utils.c index 6239b3f29..85a27a941 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_utils.c +++ b/drivers/gpu/nvgpu/common/gr/gr_utils.c @@ -85,14 +85,12 @@ struct nvgpu_gr_zbc *nvgpu_gr_get_zbc_ptr(struct gk20a *g) } #endif -#ifdef CONFIG_NVGPU_FECS_TRACE struct nvgpu_gr_global_ctx_buffer_desc *nvgpu_gr_get_global_ctx_buffer_ptr( struct gk20a *g) { struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); return gr->global_ctx_buffer; } -#endif #ifdef CONFIG_NVGPU_CILP u32 nvgpu_gr_get_cilp_preempt_pending_chid(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/common/gr/obj_ctx.c b/drivers/gpu/nvgpu/common/gr/obj_ctx.c index 3f4206307..ebed24c31 100644 --- a/drivers/gpu/nvgpu/common/gr/obj_ctx.c +++ b/drivers/gpu/nvgpu/common/gr/obj_ctx.c @@ -31,10 +31,13 @@ #endif #include #include +#include +#include #include #include #include #include +#include #include #include #include @@ -54,16 +57,46 @@ void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g, g->ops.ramin.set_gr_ptr(g, inst_block, gpu_va); } +#ifdef CONFIG_NVGPU_DEBUGGER +static void nvgpu_gr_obj_ctx_set_pm_ctx_gpu_va(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_tsg_subctx *tsg_subctx) +{ + struct nvgpu_gr_subctx *subctx; + bool set_pm_ctx_gpu_va; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + set_pm_ctx_gpu_va = nvgpu_gr_ctx_get_pm_ctx_pm_mode(gr_ctx) != + g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw(); + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + subctx = nvgpu_tsg_subctx_get_gr_subctx(tsg_subctx); + nvgpu_gr_subctx_set_hwpm_ptr(g, subctx, + set_pm_ctx_gpu_va); + } else { + nvgpu_gr_ctx_set_hwpm_ptr(g, gr_ctx, set_pm_ctx_gpu_va); + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); +} +#endif + void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block, - struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_tsg_subctx *tsg_subctx, struct nvgpu_gr_ctx_mappings *mappings) { + struct nvgpu_gr_subctx *subctx; struct nvgpu_mem *ctxheader; u64 gpu_va; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); +#ifdef CONFIG_NVGPU_DEBUGGER + nvgpu_gr_obj_ctx_set_pm_ctx_gpu_va(g, gr_ctx, tsg_subctx); +#endif + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + subctx = nvgpu_tsg_subctx_get_gr_subctx(tsg_subctx); nvgpu_gr_subctx_load_ctx_header(g, subctx, gr_ctx, mappings); ctxheader = nvgpu_gr_subctx_get_ctx_header(subctx); @@ -78,7 +111,50 @@ void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block, } #if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP) -static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g, +static void nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g, + struct nvgpu_gr_ctx_desc *gr_ctx_desc, + u32 class_num, u32 flags, + u32 *graphics_preempt_mode, u32 *compute_preempt_mode) +{ + u32 default_graphics_preempt_mode = 0U; + u32 default_compute_preempt_mode = 0U; + + g->ops.gr.init.get_default_preemption_modes( + &default_graphics_preempt_mode, + &default_compute_preempt_mode); + +#ifdef CONFIG_NVGPU_GFXP + if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) != 0U) { + *graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; + } + + if (g->ops.gpu_class.is_valid_gfx(class_num) && + nvgpu_gr_ctx_desc_force_preemption_gfxp(gr_ctx_desc)) { + *graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; + } +#endif + +#ifdef CONFIG_NVGPU_CILP + if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) != 0U) { + *compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; + } + + if (g->ops.gpu_class.is_valid_compute(class_num) && + nvgpu_gr_ctx_desc_force_preemption_cilp(gr_ctx_desc)) { + *compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; + } +#endif + + if (*compute_preempt_mode == 0U) { + *compute_preempt_mode = default_compute_preempt_mode; + } + + if (*graphics_preempt_mode == 0U) { + *graphics_preempt_mode = default_graphics_preempt_mode; + } +} + +static int nvgpu_gr_obj_ctx_init_ctxsw_preemption(struct gk20a *g, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc, struct nvgpu_gr_ctx *gr_ctx, u32 class_num, u32 flags) @@ -86,8 +162,6 @@ static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g, int err; u32 graphics_preempt_mode = 0U; u32 compute_preempt_mode = 0U; - u32 default_graphics_preempt_mode = 0U; - u32 default_compute_preempt_mode = 0U; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); @@ -97,38 +171,26 @@ static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g, return 0; } - g->ops.gr.init.get_default_preemption_modes( - &default_graphics_preempt_mode, - &default_compute_preempt_mode); - + if (nvgpu_gr_ctx_get_ctx_initialized(gr_ctx)) { #ifdef CONFIG_NVGPU_GFXP - if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) != 0U) { - graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; - } - - if (g->ops.gpu_class.is_valid_gfx(class_num) && - nvgpu_gr_ctx_desc_force_preemption_gfxp(gr_ctx_desc)) { - graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; - } + if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) != 0U) { + graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; + } #endif #ifdef CONFIG_NVGPU_CILP - if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) != 0U) { - compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; - } - - if (g->ops.gpu_class.is_valid_compute(class_num) && - nvgpu_gr_ctx_desc_force_preemption_cilp(gr_ctx_desc)) { - compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; - } + if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) != 0U) { + compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP; + } #endif - - if (compute_preempt_mode == 0U) { - compute_preempt_mode = default_compute_preempt_mode; - } - - if (graphics_preempt_mode == 0U) { - graphics_preempt_mode = default_graphics_preempt_mode; + if (nvgpu_gr_setup_validate_preemption_mode(&graphics_preempt_mode, + &compute_preempt_mode, gr_ctx) == false) { + return 0; + } + } else { + nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(g, gr_ctx_desc, + class_num, flags, &graphics_preempt_mode, + &compute_preempt_mode); } err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, config, @@ -266,43 +328,15 @@ fail: return err; } -void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, +#ifdef CONFIG_NVGPU_GFXP +static void nvgpu_gr_obj_ctx_commit_veid0_preemption_buffers(struct gk20a *g, struct nvgpu_gr_config *config, - struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings) { -#ifdef CONFIG_NVGPU_GFXP u64 addr; u32 size; struct nvgpu_mem *mem; -#endif - - (void)config; - (void)subctx; - (void)mappings; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); - - nvgpu_gr_ctx_set_preemption_modes(g, gr_ctx); - -#ifdef CONFIG_NVGPU_GFXP - if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP)) { - goto done; - } - - if (!nvgpu_mem_is_valid( - nvgpu_gr_ctx_get_ctx_mem(gr_ctx, - NVGPU_GR_CTX_PREEMPT_CTXSW))) { - goto done; - } - - if (subctx != NULL) { - nvgpu_gr_subctx_set_preemption_buffer_va(g, subctx, mappings); - } else { - nvgpu_gr_ctx_set_preemption_buffer_va(g, gr_ctx, mappings); - } - - nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true); addr = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_BETACB_CTXSW); g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx, mappings, @@ -324,6 +358,75 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, size = (u32)mem->size; g->ops.gr.init.commit_ctxsw_spill(g, gr_ctx, addr, size, true); +} +#endif + +bool nvgpu_gr_obj_ctx_is_gfx_engine(struct gk20a *g, struct nvgpu_tsg_subctx *subctx) +{ + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG) && + nvgpu_tsg_subctx_get_id(subctx) == CHANNEL_INFO_VEID0) { + return true; + } + } else if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + return true; + } + + return false; +} + +void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, + struct nvgpu_gr_config *config, + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_tsg_subctx *subctx, + struct nvgpu_gr_ctx_mappings *mappings) +{ + (void)config; + (void)subctx; + (void)mappings; + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); + + nvgpu_gr_ctx_set_preemption_modes(g, gr_ctx); + +#ifdef CONFIG_NVGPU_GFXP + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP)) { + goto done; + } + + if (!nvgpu_mem_is_valid( + nvgpu_gr_ctx_get_ctx_mem(gr_ctx, + NVGPU_GR_CTX_PREEMPT_CTXSW))) { + goto done; + } + + /* + * Commit NVGPU_GR_CTX_PREEMPT_CTXSW gpu va for all subcontexts + * considering VEID0 gpu va when subcontexts are enabled. + */ + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + nvgpu_tsg_subctxs_set_preemption_buffer_va(subctx); + } else { + nvgpu_gr_ctx_set_preemption_buffer_va(g, gr_ctx); + } + + if (!nvgpu_gr_obj_ctx_is_gfx_engine(g, subctx)) { + goto done; + } + + if (nvgpu_gr_obj_ctx_preempt_buffers_patched(gr_ctx)) { + goto done; + } + + nvgpu_gr_obj_ctx_set_preempt_buffers_patched(gr_ctx, true); + + /* + * Commit other preemption buffers only for VEID0 when subcontexts are + * enabled. Commit always when subcontext are disabled. + */ + nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true); + + nvgpu_gr_obj_ctx_commit_veid0_preemption_buffers(g, config, + gr_ctx, mappings); g->ops.gr.init.commit_cbes_reserve(g, gr_ctx, true); @@ -346,6 +449,7 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings, bool patch) { @@ -363,7 +467,11 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, * Skip BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB * if 2D/3D/I2M classes(graphics) are not supported. */ - if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + if (nvgpu_gr_obj_ctx_is_gfx_engine(g, subctx)) { + if (patch && nvgpu_gr_obj_ctx_global_ctx_buffers_patched(gr_ctx)) { + goto commit_sm_id; + } + /* global pagepool buffer */ addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings, NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VA); @@ -403,6 +511,16 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, g->ops.gr.init.commit_rtv_cb(g, addr, gr_ctx, patch); } #endif + + if (patch) { + nvgpu_gr_obj_ctx_set_global_ctx_buffers_patched(gr_ctx, + true); + } + } + +commit_sm_id: + if (patch && nvgpu_gr_ctx_get_ctx_initialized(gr_ctx)) { + goto out; } #ifdef CONFIG_NVGPU_SM_DIVERSITY @@ -427,6 +545,7 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, } #endif +out: if (patch) { nvgpu_gr_ctx_patch_write_end(g, gr_ctx, false); } @@ -561,7 +680,7 @@ clean_up: static int nvgpu_gr_obj_ctx_commit_hw_state(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_ctx_mappings *mappings) + struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings) { int err = 0; struct netlist_av_list *sw_method_init = @@ -577,7 +696,7 @@ static int nvgpu_gr_obj_ctx_commit_hw_state(struct gk20a *g, g->ops.gr.init.fe_go_idle_timeout(g, false); nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer, - config, gr_ctx, mappings, false); + config, gr_ctx, subctx, mappings, false); if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { /* override a few ctx state registers */ @@ -722,6 +841,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings, struct nvgpu_mem *inst_block) { @@ -745,13 +865,13 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, } err = nvgpu_gr_obj_ctx_commit_hw_state(g, global_ctx_buffer, - config, gr_ctx, mappings); + config, gr_ctx, subctx, mappings); if (err != 0) { goto clean_up; } #ifdef CONFIG_NVGPU_GRAPHICS - if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + if (nvgpu_gr_obj_ctx_is_gfx_engine(g, subctx)) { err = nvgpu_gr_ctx_init_zcull(g, gr_ctx); if (err != 0) { goto clean_up; @@ -832,14 +952,14 @@ static int nvgpu_gr_obj_ctx_alloc_buffers(struct gk20a *g, nvgpu_log(g, gpu_dbg_gr, " "); - nvgpu_gr_obj_ctx_gr_ctx_set_size(g, golden_image, gr_ctx_desc); - - nvgpu_gr_obj_ctx_patch_ctx_set_size(g, config, gr_ctx_desc); - - nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0); + if (!nvgpu_gr_ctx_get_ctx_initialized(gr_ctx)) { + nvgpu_gr_obj_ctx_gr_ctx_set_size(g, golden_image, gr_ctx_desc); + nvgpu_gr_obj_ctx_patch_ctx_set_size(g, config, gr_ctx_desc); + nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0); + } #if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP) - err = nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(g, config, + err = nvgpu_gr_obj_ctx_init_ctxsw_preemption(g, config, gr_ctx_desc, gr_ctx, class_num, flags); if (err != 0) { nvgpu_err(g, "fail to init preemption mode"); @@ -982,13 +1102,54 @@ out: return err; } +static int nvgpu_gr_obj_ctx_load_golden_image(struct gk20a *g, + struct nvgpu_gr_obj_ctx_golden_image *golden_image, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct nvgpu_gr_config *config, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_tsg_subctx *subctx, + struct nvgpu_gr_ctx_mappings *mappings, + struct nvgpu_mem *inst_block, + bool cde) +{ + int err; + + /* init golden image */ + err = nvgpu_gr_obj_ctx_alloc_golden_ctx_image(g, golden_image, + global_ctx_buffer, config, gr_ctx, subctx, + mappings, inst_block); + if (err != 0) { + nvgpu_err(g, "fail to init golden ctx image"); + return err; + } + +#ifdef CONFIG_NVGPU_POWER_PG + /* Re-enable ELPG now that golden image has been initialized. + * The PMU PG init code may already have tried to enable elpg, but + * would not have been able to complete this action since the golden + * image hadn't been initialized yet, so do this now. + */ + err = nvgpu_pmu_reenable_elpg(g); + if (err != 0) { + nvgpu_err(g, "fail to re-enable elpg"); + return err; + } +#endif + + /* load golden image */ + nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx, mappings, + golden_image->local_golden_image, cde); + + return 0; +} + int nvgpu_gr_obj_ctx_alloc(struct gk20a *g, struct nvgpu_gr_obj_ctx_golden_image *golden_image, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_ctx_desc *gr_ctx_desc, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_subctx *subctx, + struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings, struct nvgpu_mem *inst_block, u32 class_num, u32 flags, @@ -1005,9 +1166,11 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g, goto out; } - nvgpu_gr_ctx_init_ctx_buffers_mapping_flags(g, gr_ctx); + if (!nvgpu_gr_ctx_get_ctx_initialized(gr_ctx)) { + nvgpu_gr_ctx_init_ctx_buffers_mapping_flags(g, gr_ctx); + } - err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, + err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, subctx, global_ctx_buffer, mappings, vpr); if (err != 0) { nvgpu_err(g, "failed to map ctx buffers"); @@ -1015,52 +1178,42 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g, } nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer, - config, gr_ctx, mappings, true); + config, gr_ctx, subctx, mappings, true); /* commit gr ctx buffer */ nvgpu_gr_obj_ctx_commit_inst(g, inst_block, gr_ctx, subctx, mappings); - /* init golden image */ - err = nvgpu_gr_obj_ctx_alloc_golden_ctx_image(g, golden_image, - global_ctx_buffer, config, gr_ctx, mappings, inst_block); - if (err != 0) { - nvgpu_err(g, "fail to init golden ctx image"); - goto out; + if (!nvgpu_gr_ctx_get_ctx_initialized(gr_ctx)) { + err = nvgpu_gr_obj_ctx_load_golden_image(g, golden_image, + global_ctx_buffer, config, gr_ctx, subctx, + mappings, inst_block, cde); + if (err != 0) { + nvgpu_err(g, "fail to load golden ctx image"); + goto out; + } } -#ifdef CONFIG_NVGPU_POWER_PG - /* Re-enable ELPG now that golden image has been initialized. - * The PMU PG init code may already have tried to enable elpg, but - * would not have been able to complete this action since the golden - * image hadn't been initialized yet, so do this now. - */ - err = nvgpu_pmu_reenable_elpg(g); - if (err != 0) { - nvgpu_err(g, "fail to re-enable elpg"); - goto out; - } -#endif - - /* load golden image */ - nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx, mappings, - golden_image->local_golden_image, cde); - nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, config, gr_ctx, subctx, mappings); #ifndef CONFIG_NVGPU_NON_FUSA if (g->ops.gpu_class.is_valid_compute(class_num) && - g->ops.gr.init.set_default_compute_regs != NULL) { + (g->ops.gr.init.set_default_compute_regs != NULL) && + (!nvgpu_gr_obj_ctx_default_compute_regs_patched(gr_ctx))) { g->ops.gr.init.set_default_compute_regs(g, gr_ctx); + nvgpu_gr_obj_ctx_set_default_compute_regs_patched(gr_ctx, true); } - if (g->ops.ltc.set_default_l2_max_ways_evict_last != NULL) { + if ((g->ops.ltc.set_default_l2_max_ways_evict_last != NULL) && + (!nvgpu_gr_ctx_get_ctx_initialized(gr_ctx))) { g->ops.ltc.set_default_l2_max_ways_evict_last(g, gr_ctx); } #endif #ifdef CONFIG_NVGPU_NON_FUSA - if (g->ops.gr.init.enable_mme_config_ptimer != NULL) { + if ((g->ops.gr.init.enable_mme_config_ptimer != NULL) && + (!nvgpu_gr_ctx_get_ctx_initialized(gr_ctx))) { + err = nvgpu_pg_elpg_protected_call(g, g->ops.gr.init.enable_mme_config_ptimer(g, gr_ctx)); @@ -1076,8 +1229,10 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g, * required for graphics contexts. */ if (g->ops.gpu_class.is_valid_gfx(class_num) && - g->ops.gr.init.set_default_gfx_regs != NULL) { + (g->ops.gr.init.set_default_gfx_regs != NULL) && + (!nvgpu_gr_obj_ctx_default_gfx_regs_patched(gr_ctx))) { g->ops.gr.init.set_default_gfx_regs(g, gr_ctx, &golden_image->gfx_regs); + nvgpu_gr_obj_ctx_set_default_gfx_regs_patched(gr_ctx, true); } nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); diff --git a/drivers/gpu/nvgpu/common/gr/subctx.c b/drivers/gpu/nvgpu/common/gr/subctx.c index 6cc6d5773..09c930b4c 100644 --- a/drivers/gpu/nvgpu/common/gr/subctx.c +++ b/drivers/gpu/nvgpu/common/gr/subctx.c @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -30,17 +31,16 @@ #include "common/gr/subctx_priv.h" -struct nvgpu_gr_subctx *nvgpu_gr_subctx_alloc(struct gk20a *g, +int nvgpu_gr_subctx_setup_header(struct gk20a *g, + struct nvgpu_gr_subctx *subctx, struct vm_gk20a *vm) { - struct nvgpu_gr_subctx *subctx; int err = 0; - nvgpu_log_fn(g, " "); + nvgpu_log(g, gpu_dbg_gr, " "); - subctx = nvgpu_kzalloc(g, sizeof(*subctx)); - if (subctx == NULL) { - return NULL; + if (subctx->ctx_header.gpu_va != 0ULL) { + return 0; } err = nvgpu_dma_alloc_sys(g, @@ -48,7 +48,7 @@ struct nvgpu_gr_subctx *nvgpu_gr_subctx_alloc(struct gk20a *g, &subctx->ctx_header); if (err != 0) { nvgpu_err(g, "failed to allocate sub ctx header"); - goto err_free_subctx; + return err; } subctx->ctx_header.gpu_va = nvgpu_gmmu_map(vm, @@ -58,26 +58,65 @@ struct nvgpu_gr_subctx *nvgpu_gr_subctx_alloc(struct gk20a *g, subctx->ctx_header.aperture); if (subctx->ctx_header.gpu_va == 0ULL) { nvgpu_err(g, "failed to map ctx header"); + err = -ENOMEM; goto err_free_ctx_header; } - return subctx; + nvgpu_log(g, gpu_dbg_gr, "done"); + + return 0; err_free_ctx_header: nvgpu_dma_free(g, &subctx->ctx_header); -err_free_subctx: - nvgpu_kfree(g, subctx); - return NULL; + return err; +} + +struct nvgpu_gr_subctx *nvgpu_gr_subctx_alloc(struct gk20a *g) +{ + struct nvgpu_gr_subctx *subctx; + + nvgpu_log(g, gpu_dbg_gr, " "); + + subctx = nvgpu_kzalloc(g, sizeof(*subctx)); + if (subctx == NULL) { + return NULL; + } + + nvgpu_init_list_node(&subctx->gr_ctx_mappings_entry); + + nvgpu_log(g, gpu_dbg_gr, "done"); + + return subctx; } void nvgpu_gr_subctx_free(struct gk20a *g, - struct nvgpu_gr_subctx *subctx, - struct vm_gk20a *vm) + struct nvgpu_tsg_subctx *subctx, + struct vm_gk20a *vm, + bool unmap) { - nvgpu_log_fn(g, " "); + struct nvgpu_gr_subctx *gr_subctx = + nvgpu_tsg_subctx_get_gr_subctx(subctx); - nvgpu_dma_unmap_free(vm, &subctx->ctx_header); - nvgpu_kfree(g, subctx); + nvgpu_log(g, gpu_dbg_gr, " "); + + if (gr_subctx == NULL) { + return; + } + + if (gr_subctx->mappings != NULL) { + nvgpu_list_del(&gr_subctx->gr_ctx_mappings_entry); + nvgpu_gr_ctx_mappings_free_subctx_mappings(subctx, + gr_subctx->mappings, unmap); + gr_subctx->mappings = NULL; + } + + if (unmap) { + nvgpu_dma_unmap_free(vm, &gr_subctx->ctx_header); + } + + nvgpu_kfree(g, gr_subctx); + + nvgpu_log(g, gpu_dbg_gr, "done"); } void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g, @@ -100,11 +139,6 @@ void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g, g->ops.gr.ctxsw_prog.set_patch_addr(g, ctxheader, nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PATCH_CTX)); -#ifdef CONFIG_NVGPU_DEBUGGER - g->ops.gr.ctxsw_prog.set_pm_ptr(g, ctxheader, - nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PM_CTX)); -#endif - #ifdef CONFIG_NVGPU_GRAPHICS g->ops.gr.ctxsw_prog.set_zcull_ptr(g, ctxheader, nvgpu_gr_ctx_get_zcull_ctx_va(gr_ctx)); @@ -120,6 +154,16 @@ struct nvgpu_mem *nvgpu_gr_subctx_get_ctx_header(struct nvgpu_gr_subctx *subctx) return &subctx->ctx_header; } +struct nvgpu_gr_ctx_mappings *nvgpu_gr_subctx_get_mappings( + struct nvgpu_gr_subctx *subctx) +{ + if (subctx == NULL) { + return NULL; + } + + return subctx->mappings; +} + #ifdef CONFIG_NVGPU_GRAPHICS void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx) @@ -134,25 +178,59 @@ void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx #ifdef CONFIG_NVGPU_GFXP void nvgpu_gr_subctx_set_preemption_buffer_va(struct gk20a *g, - struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings) + struct nvgpu_gr_subctx *subctx, + struct nvgpu_gr_ctx_mappings *veid0_mappings) { - u64 preempt_ctxsw_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, - NVGPU_GR_CTX_PREEMPT_CTXSW); + u64 preempt_ctxsw_veid0_gpu_va; + u64 preempt_ctxsw_gpu_va; + struct nvgpu_mem *ctxheader; - g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, &subctx->ctx_header, + ctxheader = nvgpu_gr_subctx_get_ctx_header(subctx); + + preempt_ctxsw_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va( + subctx->mappings, + NVGPU_GR_CTX_PREEMPT_CTXSW); + + preempt_ctxsw_veid0_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va( + veid0_mappings, + NVGPU_GR_CTX_PREEMPT_CTXSW); + + g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, ctxheader, preempt_ctxsw_gpu_va); if (g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0 != NULL) { g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g, - &subctx->ctx_header, preempt_ctxsw_gpu_va); + ctxheader, preempt_ctxsw_veid0_gpu_va); + } +} + +void nvgpu_gr_subctx_clear_preemption_buffer_va(struct gk20a *g, + struct nvgpu_gr_subctx *subctx) +{ + struct nvgpu_mem *ctxheader = nvgpu_gr_subctx_get_ctx_header(subctx); + + g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, ctxheader, 0ULL); + + if (g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0 != NULL) { + g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g, + ctxheader, 0ULL); } } #endif /* CONFIG_NVGPU_GFXP */ #ifdef CONFIG_NVGPU_DEBUGGER void nvgpu_gr_subctx_set_hwpm_ptr(struct gk20a *g, - struct nvgpu_gr_subctx *subctx, u64 pm_ctx_gpu_va) + struct nvgpu_gr_subctx *subctx, + bool set_pm_ctx_gpu_va) { + u64 pm_ctx_gpu_va = 0ULL; + + if (set_pm_ctx_gpu_va) { + pm_ctx_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va( + subctx->mappings, + NVGPU_GR_CTX_PM_CTX); + } + g->ops.gr.ctxsw_prog.set_pm_ptr(g, &subctx->ctx_header, pm_ctx_gpu_va); } diff --git a/drivers/gpu/nvgpu/common/gr/subctx_priv.h b/drivers/gpu/nvgpu/common/gr/subctx_priv.h index 5737aedd8..ff2e6dadd 100644 --- a/drivers/gpu/nvgpu/common/gr/subctx_priv.h +++ b/drivers/gpu/nvgpu/common/gr/subctx_priv.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,14 +27,23 @@ struct nvgpu_mem; /** * GR subcontext data structure. - * - * One subcontext is allocated per GPU channel. */ struct nvgpu_gr_subctx { /** * Memory to hold subcontext header image. */ struct nvgpu_mem ctx_header; + + /** + * GPU mappings of the GR ctx buffers for this subcontext. + */ + struct nvgpu_gr_ctx_mappings *mappings; + + /** + * GR subcontext's entry in gr ctx mappings' (#nvgpu_gr_ctx_mappings) + * subcontexts list #subctx_list. + */ + struct nvgpu_list_node gr_ctx_mappings_entry; }; #endif /* NVGPU_GR_SUBCTX_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/gr/zcull.c b/drivers/gpu/nvgpu/common/gr/zcull.c index f745ea9dd..c374e2586 100644 --- a/drivers/gpu/nvgpu/common/gr/zcull.c +++ b/drivers/gpu/nvgpu/common/gr/zcull.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -159,15 +160,17 @@ int nvgpu_gr_zcull_init_hw(struct gk20a *g, return 0; } -int nvgpu_gr_zcull_ctx_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx, +int nvgpu_gr_zcull_ctx_setup(struct gk20a *g, struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx) { + struct nvgpu_gr_subctx *gr_subctx; int ret = 0; - if (subctx != NULL) { + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + gr_subctx = nvgpu_tsg_subctx_get_gr_subctx(subctx); ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, false); if (ret == 0) { - nvgpu_gr_subctx_zcull_setup(g, subctx, gr_ctx); + nvgpu_gr_subctx_zcull_setup(g, gr_subctx, gr_ctx); } } else { ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, true); diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c index c50e95720..7d5d127f7 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c @@ -50,6 +50,8 @@ #include #include +#include + #include "gr_vgpu.h" #include "ctx_vgpu.h" #include "subctx_vgpu.h" @@ -173,6 +175,7 @@ int vgpu_gr_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, u32 flags) struct nvgpu_tsg *tsg = NULL; struct tegra_vgpu_cmd_msg msg = {}; struct tegra_vgpu_alloc_obj_ctx_params *p = &msg.params.alloc_obj_ctx; + struct nvgpu_gr_ctx_mappings *mappings = NULL; int err = 0; nvgpu_log_fn(g, " "); @@ -211,11 +214,27 @@ int vgpu_gr_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, u32 flags) gr_ctx = tsg->gr_ctx; nvgpu_mutex_acquire(&tsg->ctx_init_lock); - if (tsg->vm == NULL) { - tsg->vm = c->vm; - nvgpu_vm_get(tsg->vm); - gr_ctx->tsgid = tsg->tsgid; + + /* + * gr_subctx and mappings are allocated/setup here just to track the + * VM references. When a new mapping is created VM reference is taken. + * It will be dropped when the last channel in the subcontext is + * released. + */ + err = nvgpu_tsg_subctx_alloc_gr_subctx(g, c); + if (err != 0) { + nvgpu_err(g, "failed to alloc gr subctx"); + nvgpu_mutex_release(&tsg->ctx_init_lock); + return err; } + + mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, c); + if (mappings == NULL) { + nvgpu_err(g, "fail to allocate/get ctx mappings struct"); + nvgpu_mutex_release(&tsg->ctx_init_lock); + return -ENOMEM; + } + nvgpu_mutex_release(&tsg->ctx_init_lock); msg.cmd = TEGRA_VGPU_CMD_ALLOC_OBJ_CTX; @@ -234,6 +253,7 @@ int vgpu_gr_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, u32 flags) err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); err = err ? err : msg.ret; if (err == 0) { + gr_ctx->tsgid = tsg->tsgid; nvgpu_gr_ctx_mark_ctx_initialized(gr_ctx); } else { nvgpu_err(g, "alloc obj ctx failed err %d", err); diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/subctx_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/subctx_vgpu.c index 850ee875d..ac47bf163 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/subctx_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/subctx_vgpu.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,35 +20,22 @@ * DEALINGS IN THE SOFTWARE. */ -#include -#include -#include +#include #include - -#include "common/gr/subctx_priv.h" +#include +#include #include "subctx_vgpu.h" -#include "common/vgpu/ivc/comm_vgpu.h" void vgpu_gr_setup_free_subctx(struct nvgpu_channel *c) { - struct tegra_vgpu_cmd_msg msg = {}; - struct tegra_vgpu_free_ctx_header_params *p = - &msg.params.free_ctx_header; - struct gk20a *g = c->g; - int err; + nvgpu_log(c->g, gpu_dbg_gr, " "); - msg.cmd = TEGRA_VGPU_CMD_FREE_CTX_HEADER; - msg.handle = vgpu_get_handle(g); - p->ch_handle = c->virt_ctx; - err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); - err = err ? err : msg.ret; - if (unlikely(err != 0)) { - nvgpu_err(g, "free ctx_header failed err %d", err); + if (!nvgpu_is_enabled(c->g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { + return; } - if (c->subctx != NULL) { - nvgpu_kfree(g, c->subctx); - c->subctx = NULL; - } + nvgpu_gr_subctx_free(c->g, c->subctx, c->vm, false); + + nvgpu_log(c->g, gpu_dbg_gr, "done"); } diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c index 4258f2ff7..fbbc6d968 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "gr_gk20a.h" #include "gr_pri_gk20a.h" @@ -82,15 +83,16 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, struct nvgpu_tsg *tsg, u32 mode) { - struct nvgpu_channel *ch; + bool set_pm_ctx_gpu_va = false; struct nvgpu_gr_ctx *gr_ctx; bool skip_update = false; - u64 pm_ctx_gpu_va = 0ULL; int ret; struct nvgpu_gr *gr = nvgpu_gr_get_instance_ptr(g, gr_instance_id); nvgpu_log_fn(g, " "); + nvgpu_mutex_acquire(&tsg->ctx_init_lock); + gr_ctx = tsg->gr_ctx; if (mode != NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW) { @@ -99,6 +101,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, if (ret != 0) { nvgpu_err(g, "failed to allocate and map pm ctxt buffer"); + nvgpu_mutex_release(&tsg->ctx_init_lock); return ret; } @@ -109,11 +112,14 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, } ret = nvgpu_gr_ctx_prepare_hwpm_mode(g, gr_ctx, mode, - &pm_ctx_gpu_va, &skip_update); + &set_pm_ctx_gpu_va, &skip_update); if (ret != 0) { + nvgpu_mutex_release(&tsg->ctx_init_lock); return ret; } + if (skip_update) { + nvgpu_mutex_release(&tsg->ctx_init_lock); return 0; } @@ -128,20 +134,16 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, nvgpu_gr_ctx_set_hwpm_pm_mode(g, gr_ctx); if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { - nvgpu_rwsem_down_read(&tsg->ch_list_lock); - nvgpu_list_for_each_entry(ch, &tsg->ch_list, - nvgpu_channel, ch_entry) { - nvgpu_gr_subctx_set_hwpm_ptr(g, ch->subctx, - pm_ctx_gpu_va); - } - nvgpu_rwsem_up_read(&tsg->ch_list_lock); + nvgpu_tsg_subctxs_set_pm_buffer_va(tsg, set_pm_ctx_gpu_va); } else { - nvgpu_gr_ctx_set_hwpm_ptr(g, gr_ctx, pm_ctx_gpu_va); + nvgpu_gr_ctx_set_hwpm_ptr(g, gr_ctx, set_pm_ctx_gpu_va); } out: g->ops.tsg.enable(tsg); + nvgpu_mutex_release(&tsg->ctx_init_lock); + return ret; } diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 0243b44d9..573a2a5a8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -43,7 +43,6 @@ struct nvgpu_fence_type; struct nvgpu_swprofiler; struct nvgpu_channel_sync; struct nvgpu_gpfifo_userdata; -struct nvgpu_gr_subctx; struct nvgpu_gr_ctx; struct nvgpu_debug_context; struct priv_cmd_queue; @@ -363,6 +362,12 @@ struct nvgpu_channel { /** Channel's entry in TSG's channel list. */ struct nvgpu_list_node ch_entry; + /** + * Channel's entry in TSG Subcontext's (#nvgpu_tsg_subctx) channels list + * #ch_list. + */ + struct nvgpu_list_node subctx_entry; + #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT struct nvgpu_channel_joblist joblist; struct gpfifo_desc gpfifo; @@ -440,8 +445,8 @@ struct nvgpu_channel { u64 virt_ctx; #endif - /** Channel's graphics subcontext. */ - struct nvgpu_gr_subctx *subctx; + /** Channel's subcontext. */ + struct nvgpu_tsg_subctx *subctx; /** Lock to access unserviceable state. */ struct nvgpu_spinlock unserviceable_lock; diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h index 990c68cf6..f50e985f0 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h @@ -42,6 +42,7 @@ struct gk20a; struct vm_gk20a; struct nvgpu_tsg; struct nvgpu_gr_ctx; +struct nvgpu_channel; struct nvgpu_gr_ctx_mappings; struct nvgpu_gr_global_ctx_buffer_desc; struct nvgpu_gr_global_ctx_local_golden_image; @@ -470,30 +471,33 @@ void nvgpu_gr_ctx_init_ctx_buffers_mapping_flags(struct gk20a *g, struct nvgpu_gr_ctx *ctx); /** - * @brief Allocate or get GR ctx buffers mappings for a TSG. + * @brief Allocate or get GR ctx buffers mappings for a TSG/Subcontext. * * @param g [in] Pointer to GPU driver struct. * @param tsg [in] Pointer to TSG struct. - * @param vm [in] Pointer to vm struct. + * @param ch [in] Pointer to Channel struct. * - * This function allocates the mappings struct for TSG corresponding to - * given vm if not available already else returns the same. + * This function allocates the mappings struct for TSG/subcontext corresponding + * to given Channel's VM if not available already else returns the same. * * @return mappings struct in case of success, null in case of failure. */ struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_alloc_or_get_mappings(struct gk20a *g, - struct nvgpu_tsg *tsg, struct vm_gk20a *vm); + struct nvgpu_tsg *tsg, struct nvgpu_channel *ch); /** - * @brief Get GR ctx buffers mappings for a TSG. + * @brief Get GR ctx buffers mappings for a TSG or Subcontext corresponding to + * a channel. * * @param tsg [in] Pointer to TSG struct. + * @param ch [in] Pointer to Channel struct. * * This function returns the mappings struct for TSG. * * @return mappings struct. */ -struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_get_mappings(struct nvgpu_tsg *tsg); +struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_get_mappings(struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch); /** * @brief Free the gr ctx mapping struct. @@ -564,8 +568,7 @@ bool nvgpu_gr_ctx_desc_force_preemption_cilp( #ifdef CONFIG_NVGPU_GFXP void nvgpu_gr_ctx_set_preemption_buffer_va(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_ctx_mappings *mappings); + struct nvgpu_gr_ctx *gr_ctx); bool nvgpu_gr_ctx_desc_force_preemption_gfxp( struct nvgpu_gr_ctx_desc *gr_ctx_desc); @@ -608,10 +611,10 @@ int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, - u32 mode, u64 *pm_ctx_gpu_va, bool *skip_update); + u32 mode, bool *set_pm_ctx_gpu_va, bool *skip_update); void nvgpu_gr_ctx_set_hwpm_pm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx); void nvgpu_gr_ctx_set_hwpm_ptr(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, - u64 pm_ctx_gpu_va); + bool set_pm_ctx_gpu_va); void nvgpu_gr_ctx_set_pm_ctx_mapped(struct nvgpu_gr_ctx *ctx, bool mapped); #ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING @@ -625,4 +628,17 @@ bool nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close( struct nvgpu_gr_ctx_desc *gr_ctx_desc); #endif +bool nvgpu_gr_obj_ctx_global_ctx_buffers_patched(struct nvgpu_gr_ctx *gr_ctx); +void nvgpu_gr_obj_ctx_set_global_ctx_buffers_patched( + struct nvgpu_gr_ctx *gr_ctx, bool patched); +bool nvgpu_gr_obj_ctx_preempt_buffers_patched(struct nvgpu_gr_ctx *gr_ctx); +void nvgpu_gr_obj_ctx_set_preempt_buffers_patched( + struct nvgpu_gr_ctx *gr_ctx, bool patched); +bool nvgpu_gr_obj_ctx_default_compute_regs_patched(struct nvgpu_gr_ctx *gr_ctx); +void nvgpu_gr_obj_ctx_set_default_compute_regs_patched( + struct nvgpu_gr_ctx *gr_ctx, bool patched); +bool nvgpu_gr_obj_ctx_default_gfx_regs_patched(struct nvgpu_gr_ctx *gr_ctx); +void nvgpu_gr_obj_ctx_set_default_gfx_regs_patched( + struct nvgpu_gr_ctx *gr_ctx, bool patched); + #endif /* NVGPU_GR_CTX_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx_mappings.h b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx_mappings.h index 0ce3e54a4..4f99f715b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx_mappings.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx_mappings.h @@ -27,6 +27,7 @@ struct gk20a; struct nvgpu_tsg; struct vm_gk20a; struct nvgpu_gr_ctx; +struct nvgpu_gr_subctx; struct nvgpu_gr_ctx_mappings; struct nvgpu_gr_global_ctx_buffer_desc; @@ -78,15 +79,18 @@ int nvgpu_gr_ctx_mappings_map_ctx_buffer(struct gk20a *g, * * @param g [in] Pointer to GPU driver struct. * @param ctx [in] Pointer to GR context struct. + * @param subctx [in] Pointer to TSG subcontext struct. * @param mappings [in] Pointer to GR context buffer mappings struct. * * This function will map the GR context preemption buffers in #mappings->vm - * and stores the mapped address. + * and stores the mapped address. For subcontext case NVGPU_GR_CTX_PREEMPT_CTXSW + * buffer is mapped to all subcontexts. * * @return 0 in case of success, < 0 in case of failure. */ int nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(struct gk20a *g, struct nvgpu_gr_ctx *ctx, + struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings); /** @@ -94,6 +98,7 @@ int nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(struct gk20a *g, * * @param g [in] Pointer to GPU driver struct. * @param gr_ctx [in] Pointer to GR context struct. + * @param subctx [in] Pointer to TSG subcontext struct. * @param global_ctx_buffer [in] Pointer global context buffer desc. * @param mappings [in] Pointer to GR context buffer * mappings struct. @@ -106,7 +111,7 @@ int nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(struct gk20a *g, * @return 0 in case of success, < 0 in case of failure. */ int nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_ctx_mappings *mappings, bool vpr); @@ -116,6 +121,7 @@ int nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(struct gk20a *g, * * @param g [in] Pointer to GPU driver struct. * @param gr_ctx [in] Pointer to GR context struct. + * @param subctx [in] Pointer to TSG subcontext struct. * @param global_ctx_buffer [in] Pointer global context buffer desc. * @param mappings [in] Pointer to GR context buffer * mappings struct. @@ -124,6 +130,7 @@ int nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(struct gk20a *g, */ void nvgpu_gr_ctx_unmap_buffers(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_ctx_mappings *mappings); @@ -157,4 +164,80 @@ u64 nvgpu_gr_ctx_mappings_get_global_ctx_va(struct nvgpu_gr_ctx_mappings *mappin u64 nvgpu_gr_ctx_mappings_get_ctx_va(struct nvgpu_gr_ctx_mappings *mappings, u32 index); +/** + * @brief Get GR ctx buffers mappings for a TSG corresponding to VM. + * + * @param g [in] Pointer to GPU driver struct. + * @param tsg [in] Pointer to TSG struct. + * @param vm [in] Pointer to vm struct. + * + * This function retrieves the mappings struct for TSG corresponding to + * given vm from #tsg->gr_ctx_mappings_list. + * + * @return mappings struct in case of success, null in case of failure. + */ +struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_mappings_get_subctx_mappings( + struct gk20a *g, + struct nvgpu_tsg *tsg, + struct vm_gk20a *vm); + +/** + * @brief Allocate GR ctx buffers mappings for a TSG corresponding to VM. + * + * @param g [in] Pointer to GPU driver struct. + * @param tsg [in] Pointer to TSG struct. + * @param vm [in] Pointer to vm struct. + * + * This function allocates the mappings struct for TSG corresponding to + * given vm and inserts in #tsg->gr_ctx_mappings_list. + * + * @return mappings struct in case of success, null in case of failure. + */ +struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_mappings_create_subctx_mappings( + struct gk20a *g, + struct nvgpu_tsg *tsg, + struct vm_gk20a *vm); + +/** + * @brief Link GR subctx to mappings struct. + * + * @param mappings [in] Pointer to GR context buffers mappings struct. + * @param subctx [in] Pointer to GR subcontext struct. + * + * This function checks and inserts the subctx in #mappings->subctx_list. + */ +void nvgpu_gr_ctx_mappings_add_gr_subctx(struct nvgpu_gr_ctx_mappings *mappings, + struct nvgpu_gr_subctx *subctx); + +/** + * @brief Free GR context buffers mappings struct for subcontexts. + * + * @param subctx [in] Pointer to GR subcontext struct. + * @param mappings [in] Pointer to GR context buffers mappings struct. + * @param unmap [in] Indicates if the GR context buffers are to be + * unmapped. true in case of native nvgpu config, + * false in case of vgpu config. For vgpu case, + * this path is used to handle the VM references + * per subcontext. + * + * This function checks if the #mappings->subctx_list is empty and if empty, + * unmaps the buffers and deletes the mappings. + */ +void nvgpu_gr_ctx_mappings_free_subctx_mappings(struct nvgpu_tsg_subctx *subctx, + struct nvgpu_gr_ctx_mappings *mappings, bool unmap); + +/** + * @brief Map GR context buffer to all subcontext VMs. + * + * @param tsg [in] Pointer to tsg struct. + * @param index [in] Index of the buffer to be mapped. + * + * This function maps the GR context buffer at #index to all VMs listed + * in #tsg->gr_ctx_mappings_list. + * + * @return 0 in case of success, < 0 in case of failure. + */ +int nvgpu_gr_ctx_mappings_map_buffer_all_subctx( + struct nvgpu_tsg *tsg, u32 index); + #endif /* NVGPU_GR_CTX_MAPPINGS_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_utils.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_utils.h index 6096fedd2..e85fe09ca 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_utils.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_utils.h @@ -130,9 +130,7 @@ struct nvgpu_gr_hwpm_map *nvgpu_gr_get_hwpm_map_ptr(struct gk20a *g); void nvgpu_gr_reset_falcon_ptr(struct gk20a *g); void nvgpu_gr_reset_golden_image_ptr(struct gk20a *g); #endif -#ifdef CONFIG_NVGPU_FECS_TRACE struct nvgpu_gr_global_ctx_buffer_desc *nvgpu_gr_get_global_ctx_buffer_ptr( struct gk20a *g); -#endif #endif /* NVGPU_GR_UTILS_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h index ee5d147b5..d5edab064 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h @@ -34,7 +34,7 @@ struct gk20a; struct nvgpu_gr_ctx; struct nvgpu_gr_ctx_mappings; -struct nvgpu_gr_subctx; +struct nvgpu_tsg_subctx; struct nvgpu_gr_config; struct nvgpu_gr_ctx_desc; struct vm_gk20a; @@ -70,7 +70,7 @@ void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g, * @param g [in] Pointer to GPU driver struct. * @param inst_block [in] Pointer to channel instance block. * @param gr_ctx [in] Pointer to graphics context buffer. - * @param subctx [in] Pointer to graphics subcontext buffer. + * @param subctx [in] Pointer to TSG subcontext struct. * @param mappings [in] Pointer to mappings of the GR context buffers. * * If graphics subcontexts are supported, subcontext buffer GPU virtual @@ -82,9 +82,23 @@ void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g, * instance block. */ void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block, - struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings); +/** + * brief Check if the VEID is sync when subcontexts are enabled. + * + * @param g [in] Pointer to GPU driver struct. + * @param subctx [in] Pointer to TSG subcontext struct. + * + * @retval true if subcontexts are enabled, MIG is disabled and \a subctx + * corresponds to VEID0. + * @retval true if subcontexts are disabled and MIG is disabled. + * @retval false otherwise. + */ +bool nvgpu_gr_obj_ctx_is_gfx_engine(struct gk20a *g, + struct nvgpu_tsg_subctx *subctx); + /** * brief Initialize preemption mode in context struct. * @@ -120,7 +134,7 @@ int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g, * @param g [in] Pointer to GPU driver struct. * @param config [in] Pointer to GR configuration struct. * @param gr_ctx [in] Pointer to graphics context. - * @param subctx [in] Pointer to graphics subcontext buffer. + * @param subctx [in] Pointer to TSG subcontext struct. * @param mappings [in] Pointer to mappings of GR context buffers. * * This function will read preemption modes stored in #nvgpu_gr_ctx @@ -134,7 +148,7 @@ int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g, */ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_config *config, - struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings); /** @@ -144,6 +158,7 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, * @param global_ctx_buffer [in] Pointer to global context descriptor struct. * @param config [in] Pointer to GR configuration struct. * @param gr_ctx [in] Pointer to graphics context. + * @param subctx [in] Pointer to TSG subcontext struct. * @param mappings [in] Pointer to mappings of GR context buffers. * @param patch [in] Boolean flag to use patch context buffer. * @@ -156,7 +171,8 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_ctx_mappings *mappings, bool patch); + struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings, + bool patch); /** * @brief Allocate and setup object context s/w image for VEID0 GPU channel. @@ -191,6 +207,7 @@ int nvgpu_gr_obj_ctx_init_golden_context_image(struct gk20a *g); * @param global_ctx_buffer [in] Pointer to global context descriptor struct. * @param config [in] Pointer to GR configuration struct. * @param gr_ctx [in] Pointer to graphics context. + * @param subctx [in] Pointer to TSG subcontext struct. * @param inst_block [in] Pointer to channel instance block. * * This function allocates golden context image. @@ -222,6 +239,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings, struct nvgpu_mem *inst_block); @@ -234,7 +252,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, * @param gr_ctx_desc [in] Pointer to GR context descriptor struct. * @param config [in] Pointer to GR configuration struct. * @param gr_ctx [in] Pointer to graphics context. - * @param subctx [in] Pointer to graphics subcontext buffer. + * @param subctx [in] Pointer to TSG subcontext struct. * @param mappings [in] Pointer to mappings of the GR context buffers. * @param inst_block [in] Pointer to channel instance block. * @param class_num [in] GR engine class. @@ -274,7 +292,7 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g, struct nvgpu_gr_ctx_desc *gr_ctx_desc, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_subctx *subctx, + struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings, struct nvgpu_mem *inst_block, u32 class_num, u32 flags, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h b/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h index 4b6ed33d0..593ed6d89 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h @@ -106,6 +106,20 @@ void nvgpu_gr_setup_free_gr_ctx(struct gk20a *g, */ void nvgpu_gr_setup_free_subctx(struct nvgpu_channel *c); +/** + * @brief Validate preemption mode in GR engine context image in case + * Application optionally wants to change default preemption mode. + * + * @param graphics_preempt_mode [in] Requested graphics preemption mode. + * @param compute_preempt_mode [in] Requested compute preemption mode. + * @param gr_ctx [in] Pointer to GR engine context image. + * + * @return true in case of success, false in case of failure. + */ +bool nvgpu_gr_setup_validate_preemption_mode(u32 *graphics_preempt_mode, + u32 *compute_preempt_mode, + struct nvgpu_gr_ctx *gr_ctx); + /** * @brief Setup preemption mode in GR engine context image in case * Application optionally wants to change default preemption mode. diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h index 8739a165b..df2b98d26 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h @@ -32,41 +32,62 @@ */ struct gk20a; struct vm_gk20a; +struct nvgpu_gr_ctx; struct nvgpu_gr_subctx; struct nvgpu_mem; struct nvgpu_gr_ctx_mappings; /** - * @brief Allocate graphics subcontext buffer. - * - * @param g [in] Pointer to GPU driver struct. - * @param vm [in] Pointer to virtual memory. - * - * This function allocates memory for #nvgpu_gr_subctx structure - * and subcontext header stored in #nvgpu_gr_subctx structure. - * - * Subcontext header memory will be mapped to given virtual - * memory. - * - * @return pointer to #nvgpu_gr_subctx struct in case of success, - * NULL in case of failure. - */ -struct nvgpu_gr_subctx *nvgpu_gr_subctx_alloc(struct gk20a *g, - struct vm_gk20a *vm); - -/** - * @brief Free graphics subcontext buffer. + * @brief Allocate and map graphics subcontext context header buffer. * * @param g [in] Pointer to GPU driver struct. * @param subctx [in] Pointer to graphics subcontext struct. * @param vm [in] Pointer to virtual memory. * + * This function allocates memory for subcontext header stored in + * #nvgpu_gr_subctx structure. + * + * Subcontext header memory will be mapped to given virtual + * memory. + * + * @return 0 in case of success, < 0 in case of failure. + */ +int nvgpu_gr_subctx_setup_header(struct gk20a *g, + struct nvgpu_gr_subctx *subctx, + struct vm_gk20a *vm); + +/** + * @brief Allocate graphics subcontext buffer. + * + * @param g [in] Pointer to GPU driver struct. + * + * This function allocates memory for #nvgpu_gr_subctx structure. + * + * @return pointer to #nvgpu_gr_subctx struct in case of success, + * NULL in case of failure. + */ +struct nvgpu_gr_subctx *nvgpu_gr_subctx_alloc(struct gk20a *g); + +/** + * @brief Free graphics subcontext buffer. + * + * @param g [in] Pointer to GPU driver struct. + * @param subctx [in] Pointer to TSG subcontext struct. + * @param vm [in] Pointer to virtual memory. + * @param unmap [in] Indicates if GR context buffers and subctx + * buffer are to be unmapped. + * true in case of native nvgpu config and + * false in case of vgpu config. For vgpu case, + * this path is used to handle the VM references + * per subcontext. + * * This function will free memory allocated for subcontext header and * #nvgpu_gr_subctx structure. */ void nvgpu_gr_subctx_free(struct gk20a *g, - struct nvgpu_gr_subctx *subctx, - struct vm_gk20a *vm); + struct nvgpu_tsg_subctx *subctx, + struct vm_gk20a *vm, + bool unmap); /** * @brief Initialize graphics subcontext buffer header. @@ -101,6 +122,19 @@ void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g, */ struct nvgpu_mem *nvgpu_gr_subctx_get_ctx_header(struct nvgpu_gr_subctx *subctx); +/** + * @brief Get pointer of GR context buffers mappings struct for a subcontext. + * + * @param subctx [in] Pointer to graphics subcontext struct. + * + * This function returns #nvgpu_gr_ctx_mappings pointer of GR context buffers + * mappings stored in #nvgpu_gr_subctx. + * + * @return pointer to subcontext GR context buffers mappings struct. + */ +struct nvgpu_gr_ctx_mappings *nvgpu_gr_subctx_get_mappings( + struct nvgpu_gr_subctx *subctx); + #ifdef CONFIG_NVGPU_GRAPHICS void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx); @@ -108,10 +142,14 @@ void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx void nvgpu_gr_subctx_set_preemption_buffer_va(struct gk20a *g, struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings); + +void nvgpu_gr_subctx_clear_preemption_buffer_va(struct gk20a *g, + struct nvgpu_gr_subctx *subctx); #endif #ifdef CONFIG_NVGPU_DEBUGGER void nvgpu_gr_subctx_set_hwpm_ptr(struct gk20a *g, - struct nvgpu_gr_subctx *subctx, u64 pm_ctx_gpu_va); + struct nvgpu_gr_subctx *subctx, + bool set_pm_ctx_gpu_va); #endif #endif /* NVGPU_GR_SUBCTX_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/zcull.h b/drivers/gpu/nvgpu/include/nvgpu/gr/zcull.h index b3e21c7eb..c84db9129 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/zcull.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/zcull.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -28,8 +28,8 @@ struct gk20a; struct nvgpu_gr_config; struct nvgpu_gr_ctx; -struct nvgpu_gr_subctx; struct nvgpu_gr_zcull; +struct nvgpu_tsg_subctx; struct nvgpu_gr_zcull_info { u32 width_align_pixels; @@ -54,7 +54,7 @@ int nvgpu_gr_zcull_init_hw(struct gk20a *g, struct nvgpu_gr_zcull *gr_zcull, struct nvgpu_gr_config *gr_config); -int nvgpu_gr_zcull_ctx_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx, +int nvgpu_gr_zcull_ctx_setup(struct gk20a *g, struct nvgpu_tsg_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx); #endif /* NVGPU_GR_ZCULL_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h index 36f907cd7..9b6280df1 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h +++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h @@ -78,8 +78,6 @@ struct nvgpu_tsg { /** Pointer to GPU driver struct. */ struct gk20a *g; - /** Points to TSG's virtual memory */ - struct vm_gk20a *vm; /** * Starting with Volta, when a Channel/TSG is set up, a recovery buffer * region must be allocated in BAR2, to allow engine to save methods if @@ -98,6 +96,12 @@ struct nvgpu_tsg { */ struct nvgpu_gr_ctx *gr_ctx; + /** + * List of gr_ctx buffers maps (#nvgpu_gr_ctx_mappings) for gr ctx + * for this TSG. Accessed by holding #ctx_init_lock from TSG. + */ + struct nvgpu_list_node gr_ctx_mappings_list; + /** * Mutex to prevent concurrent context initialization for channels * in same TSG. All channels in one TSG share the context buffer, @@ -113,6 +117,12 @@ struct nvgpu_tsg { */ struct nvgpu_ref refcount; + /** + * List of subcontexts (#nvgpu_tsg_subctx) bound to this TSG. + * Accessed by holding #ch_list_lock from TSG. + */ + struct nvgpu_list_node subctx_list; + /** List of channels bound to this TSG. */ struct nvgpu_list_node ch_list; #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL @@ -128,7 +138,7 @@ struct nvgpu_tsg { #endif /** * Read write type of semaphore lock used for accessing/modifying - * #ch_list. + * #ch_list, #subctx_list and #ch_list in #nvgpu_tsg_subctx. */ struct nvgpu_rwsem ch_list_lock; @@ -272,8 +282,6 @@ struct nvgpu_tsg *nvgpu_tsg_open(struct gk20a *g, pid_t pid); * - Call non-NULL HAL to release tsg. This HAL is non-NULL for vgpu only. * - Call nvgpu_free_gr_ctx_struct to free #nvgpu_tsg.gr_ctx. * - Set #nvgpu_tsg.gr_ctx to NULL. - * - If #nvgpu_tsg.vm is non-NULL, do #nvgpu_vm_put for this vm and set - * it to NULL (Unhook TSG from VM). * - If #nvgpu_tsg.sm_error_states is non-NULL, free allocated memory and set * it to NULL. */ @@ -286,7 +294,7 @@ void nvgpu_tsg_release_common(struct gk20a *g, struct nvgpu_tsg *tsg); * * - Get pointer to the #nvgpu_tsg using #ref. * - Call HAL to free #nvgpu_tsg.gr_ctx if this memory pointer is non-NULL - * and valid and also #nvgpu_tsg.vm is non-NULL. + * and valid. * - Unhook all events created on the TSG being released. * -- Acquire #nvgpu_tsg.event_id_list_lock. * -- While #nvgpu_tsg.event_id_list is non-empty, @@ -363,6 +371,7 @@ void nvgpu_tsg_disable(struct nvgpu_tsg *tsg); * - If channel had ASYNC subctx id, then set runqueue selector to 1. * - Set runlist id of TSG to channel's runlist_id if runlist_id of TSG * is set to #NVGPU_INVALID_TSG_ID. + * - Bind channel to TSG subcontext calling #nvgpu_tsg_subctx_bind_channel. * - Call HAL to bind channel to TSG. * - Add channel to TSG's list of channels. See #nvgpu_tsg.ch_list * - Set #nvgpu_channel.tsgid to #nvgpu_tsg.tsgid. @@ -445,6 +454,7 @@ struct nvgpu_tsg *nvgpu_tsg_check_and_get_from_id(struct gk20a *g, u32 tsgid); * - If NEXT bit is set and force is set to false, caller will * have to retry unbind. * - Remove channel from its runlist. + * - Remove channel from subctx by calling #nvgpu_tsg_subctx_unbind_channel. * - Remove channel from TSG's channel list. * - Set tsgid of the channel to #NVGPU_INVALID_TSG_ID. * - Disable channel so that it is not picked up by h/w scheduler. @@ -456,6 +466,7 @@ struct nvgpu_tsg *nvgpu_tsg_check_and_get_from_id(struct gk20a *g, u32 tsgid); * - Call #nvgpu_channel_update_runlist to remove the channel from the runlist. * - Acquire #nvgpu_tsg.ch_list_lock of the tsg and delete channel from * #nvgpu_tsg.ch_list. + * - Remove channel from subctx by calling #nvgpu_tsg_subctx_unbind_channel. * - Remove channel from TSG's channel list. * - Set #nvgpu_channel.tsgid to #NVGPU_INVALID_TSG_ID * - Release #nvgpu_tsg.ch_list_lock of the tsg. diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg_subctx.h b/drivers/gpu/nvgpu/include/nvgpu/tsg_subctx.h new file mode 100644 index 000000000..73cfd444e --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/tsg_subctx.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_TSG_SUBCTX_H +#define NVGPU_TSG_SUBCTX_H + +#include + +struct gk20a; +struct nvgpu_tsg; +struct nvgpu_tsg_subctx; +struct nvgpu_channel; +/** + * @brief Bind a channel to the TSG subcontext. + * + * @param tsg [in] Pointer to TSG struct. + * @param ch [in] Pointer to Channel struct. + * + * - Loop through the #subctx_list in #tsg to check if the subctx + * exists for the provided channel. + * - If it exists, validate the channel VM with subctx VM. + * - If validated, add the channel to the subctx #ch_list and exit. + * - Else allocate and initialize new subctx structure. + * - Add the channel to the subctx #ch_list and add subctx to the + * TSG #subctx_list. + * + * @return 0 for successful bind or if subctx support is disabled, + * < 0 for failure. + * @retval -EINVAL if channel VM doesn't match with subctx VM for provided + * subctx_id. + * @retval -ENOMEM if subctx allocation fails. + */ +int nvgpu_tsg_subctx_bind_channel(struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch); + +/** + * @brief Unbind a channel from the TSG subcontext. + * + * @param tsg [in] Pointer to TSG struct. + * @param ch [in] Pointer to Channel struct. + * + * - Validate that #subctx is allocated for the channel #ch. + * - Remove the channel from the subctx #ch_list. + * - If the subctx #ch_list is empty + * - Invoke g->ops.gr.setup.free_subctx to free the GR subcontext + * struct (and GR subcontext mappings struct). + * - Remove the subctx from the TSG #subctx_list. + * - Free the subctx memory. If this was the only active channel + * in the TSG this function will delete the objects in the + * sequence: mappings -> gr_subctx -> tsg_subctx + */ +void nvgpu_tsg_subctx_unbind_channel(struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch); + +/** + * @brief Allocate GR subcontext for a TSG subcontext. + * + * @param g [in] Pointer to gk20a struct. + * @param ch [in] Pointer to Channel struct. + * + * - Check if TSG subctx is allocated for the channel. + * - If not allocated, return error. + * - If allocated, and if GR subcontext is not allocated call + * #nvgpu_gr_subctx_alloc. + * + * @return 0 for successful allocation, < 0 for failure. + */ +int nvgpu_tsg_subctx_alloc_gr_subctx(struct gk20a *g, struct nvgpu_channel *ch); + +/** + * @brief Allocate and map GR subcontext header for a TSG subcontext. + * + * @param g [in] Pointer to gk20a struct. + * @param ch [in] Pointer to Channel struct. + * + * - Check if TSG and GR subctx is allocated for the channel. + * - If not allocated, return error. + * - If allocated, setup subcontext header by calling + * #nvgpu_gr_subctx_setup_header. + * + * @return 0 for successful allocation, < 0 for failure. + */ +int nvgpu_tsg_subctx_setup_subctx_header(struct gk20a *g, + struct nvgpu_channel *ch); + +/** + * @brief Get GR subcontext for a TSG subcontext. + * + * @param tsg_subctx [in] Pointer to TSG Subcontext struct. + * + * - Return #gr_subctx from #nvgpu_tsg_subctx. + */ +struct nvgpu_gr_subctx *nvgpu_tsg_subctx_get_gr_subctx( + struct nvgpu_tsg_subctx *tsg_subctx); + +/** + * @brief Get id of a TSG subcontext. + * + * @param tsg_subctx [in] Pointer to TSG Subcontext struct. + * + * - Return #subctx_id from #nvgpu_tsg_subctx. + */ +u32 nvgpu_tsg_subctx_get_id(struct nvgpu_tsg_subctx *tsg_subctx); + +/** + * @brief Allocate or get the mappings struct for the TSG subcontext. + * + * @param g [in] Pointer to GPU driver struct. + * @param tsg [in] Pointer to TSG struct. + * @param ch [in] Pointer to Channel struct. + * + * This function allocates the mappings struct for subcontext corresponding + * to given Channel's VM if not available already else returns the same. + * It adds the gr_subctx corresponding to the channel the mapping object's + * subctx_list. + * + * @return mappings struct in case of success, null in case of failure. + */ +struct nvgpu_gr_ctx_mappings *nvgpu_tsg_subctx_alloc_or_get_mappings( + struct gk20a *g, + struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch); + +#ifdef CONFIG_NVGPU_GFXP +/** + * @brief Program preemption buffer virtual addresses for all subcontexts. + * + * @param tsg_subctx [in] Pointer to TSG subcontext struct. + * + * - Checks if VEID0 mappings are available. + * - If available, program the preemption buffer virtual addresses + * (VEID0 VA and VA in subcontext VM) for all GR subcontexts' + * headers. + */ +void nvgpu_tsg_subctxs_set_preemption_buffer_va( + struct nvgpu_tsg_subctx *tsg_subctx); + +/** + * @brief Clear preemption buffer virtual addresses for all subcontexts. + * + * @param tsg_subctx [in] Pointer to TSG subcontext struct. + * + * - Program the preemption buffer virtual addresses + * (VEID0 VA and VA in subcontext VM) for all GR subcontexts' + * headers to 0. + */ +void nvgpu_tsg_subctxs_clear_preemption_buffer_va( + struct nvgpu_tsg_subctx *tsg_subctx); +#endif /* CONFIG_NVGPU_GFXP */ + +#ifdef CONFIG_NVGPU_DEBUGGER +/** + * @brief Program PM buffer virtual addresses for all subcontexts. + * + * @param tsg [in] Pointer to TSG struct. + * @param set_pm_ctx_gpu_va [in] Indicates if PM ctx buffer GPU VA + * is to be programmed. + * + * - Program the PM buffer virtual address for all GR subcontexts' headers. + */ +void nvgpu_tsg_subctxs_set_pm_buffer_va(struct nvgpu_tsg *tsg, + bool set_pm_ctx_gpu_va); +#endif /* CONFIG_NVGPU_DEBUGGER */ + +#endif /* NVGPU_TSG_SUBCTX_H */ diff --git a/libs/igpu/libnvgpu-drv-igpu_safe.export b/libs/igpu/libnvgpu-drv-igpu_safe.export index 48e6653ae..d3e9512c4 100644 --- a/libs/igpu/libnvgpu-drv-igpu_safe.export +++ b/libs/igpu/libnvgpu-drv-igpu_safe.export @@ -709,6 +709,10 @@ nvgpu_tsg_store_sm_error_state nvgpu_tsg_get_sm_error_state nvgpu_tsg_abort nvgpu_tsg_bind_channel +nvgpu_tsg_subctx_bind_channel +nvgpu_tsg_subctx_unbind_channel +nvgpu_tsg_subctx_alloc_gr_subctx +nvgpu_tsg_subctx_setup_subctx_header nvgpu_tsg_check_and_get_from_id nvgpu_tsg_cleanup_sw nvgpu_tsg_default_timeslice_us diff --git a/userspace/units/fifo/tsg/nvgpu-tsg.c b/userspace/units/fifo/tsg/nvgpu-tsg.c index 618f15e83..e61e683b9 100644 --- a/userspace/units/fifo/tsg/nvgpu-tsg.c +++ b/userspace/units/fifo/tsg/nvgpu-tsg.c @@ -654,7 +654,6 @@ int test_tsg_release(struct unit_module *m, struct nvgpu_fifo *f = &g->fifo; struct gpu_ops gops = g->ops; struct nvgpu_tsg *tsg = NULL; - struct vm_gk20a vm; u32 branches = 0U; int ret = UNIT_FAIL; u32 free_gr_ctx_mask = @@ -706,12 +705,6 @@ int test_tsg_release(struct unit_module *m, if (branches & F_TSG_RELEASE_MEM) { ret = nvgpu_gr_ctx_alloc_ctx_buffers(g, gr_ctx_desc, tsg->gr_ctx); unit_assert(ret == UNIT_SUCCESS, goto done); - tsg->vm = &vm; - /* prevent nvgpu_vm_remove */ - nvgpu_ref_init(&vm.ref); - nvgpu_ref_get(&vm.ref); - } else { - tsg->vm = NULL; } if ((branches & free_gr_ctx_mask) == free_gr_ctx_mask) { @@ -755,7 +748,6 @@ int test_tsg_release(struct unit_module *m, unit_assert(!f->tsg[tsg->tsgid].in_use, goto done); unit_assert(tsg->gr_ctx == NULL, goto done); - unit_assert(tsg->vm == NULL, goto done); unit_assert(tsg->sm_error_states == NULL, goto done); } ret = UNIT_SUCCESS; diff --git a/userspace/units/gr/ctx/Makefile.tmk b/userspace/units/gr/ctx/Makefile.tmk index af39b0e43..06ee19f07 100644 --- a/userspace/units/gr/ctx/Makefile.tmk +++ b/userspace/units/gr/ctx/Makefile.tmk @@ -28,7 +28,8 @@ NVGPU_UNIT_NAME = nvgpu-gr-ctx NVGPU_UNIT_SRCS = nvgpu-gr-ctx.c NVGPU_UNIT_INTERFACE_DIRS := \ - $(NV_COMPONENT_DIR)/.. + $(NV_COMPONENT_DIR)/.. \ + $(NV_COMPONENT_DIR)/../../fifo include $(NV_COMPONENT_DIR)/../../Makefile.units.common.tmk diff --git a/userspace/units/gr/ctx/nvgpu-gr-ctx.c b/userspace/units/gr/ctx/nvgpu-gr-ctx.c index 05b20b8e5..44902fdd2 100644 --- a/userspace/units/gr/ctx/nvgpu-gr-ctx.c +++ b/userspace/units/gr/ctx/nvgpu-gr-ctx.c @@ -42,6 +42,8 @@ #include "../nvgpu-gr.h" #include "nvgpu-gr-ctx.h" +#include "../../fifo/nvgpu-fifo-common.h" + #define DUMMY_SIZE 0xF0U static u64 nvgpu_gmmu_map_locked_stub(struct vm_gk20a *vm, @@ -92,14 +94,24 @@ int test_gr_ctx_error_injection(struct unit_module *m, u64 low_hole = SZ_4K * 16UL; struct nvgpu_channel *channel = (struct nvgpu_channel *) malloc(sizeof(struct nvgpu_channel)); - struct nvgpu_tsg *tsg = (struct nvgpu_tsg *) - malloc(sizeof(struct nvgpu_tsg)); + struct nvgpu_tsg *tsg; u32 i; - if (channel == NULL || tsg == NULL) { + if (channel == NULL) { unit_return_fail(m, "failed to allocate channel/tsg"); } + err = test_fifo_init_support(m, g, NULL); + if (err != 0) { + unit_return_fail(m, "failed to init fifo support\n"); + return err; + } + + tsg = nvgpu_tsg_open(g, 0); + if (!tsg) { + unit_return_fail(m, "failed to allocate tsg"); + } + desc = nvgpu_gr_ctx_desc_alloc(g); if (!desc) { unit_return_fail(m, "failed to allocate memory"); @@ -147,7 +159,7 @@ int test_gr_ctx_error_injection(struct unit_module *m, tsg->gr_ctx = gr_ctx; - mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, vm); + mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, channel); if (mappings == NULL) { unit_return_fail(m, "failed to allocate gr_ctx mappings"); } @@ -179,7 +191,7 @@ int test_gr_ctx_error_injection(struct unit_module *m, /* Inject kmem alloc failures to trigger mapping failures */ for (i = 0; i < 2; i++) { nvgpu_posix_enable_fault_injection(kmem_fi, true, 2 * i); - err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, + err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, NULL, global_desc, mappings, false); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -188,8 +200,8 @@ int test_gr_ctx_error_injection(struct unit_module *m, } /* global ctx_desc size is not set. */ - err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc, - mappings, false); + err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, NULL, + global_desc, mappings, false); if (err == 0) { unit_return_fail(m, "unexpected success"); } @@ -211,8 +223,8 @@ int test_gr_ctx_error_injection(struct unit_module *m, /* Fail global ctx buffer mappings */ for (i = 0; i < 4; i++) { nvgpu_posix_enable_fault_injection(kmem_fi, true, 4 + (2 * i)); - err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc, - mappings, false); + err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, NULL, + global_desc, mappings, false); if (err == 0) { unit_return_fail(m, "unexpected success"); } @@ -221,8 +233,8 @@ int test_gr_ctx_error_injection(struct unit_module *m, /* Successful mapping */ - err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc, - mappings, false); + err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, NULL, + global_desc, mappings, false); if (err != 0) { unit_return_fail(m, "failed to map global buffers"); } @@ -253,6 +265,12 @@ int test_gr_ctx_error_injection(struct unit_module *m, nvgpu_gr_ctx_desc_free(g, desc); nvgpu_vm_put(g->mm.bar1.vm); + err = test_fifo_remove_support(m, g, NULL); + if (err != 0) { + unit_return_fail(m, "failed to remove fifo support\n"); + return err; + } + return UNIT_SUCCESS; } diff --git a/userspace/units/gr/intr/nvgpu-gr-intr.c b/userspace/units/gr/intr/nvgpu-gr-intr.c index b928cfb51..4a96c2a93 100644 --- a/userspace/units/gr/intr/nvgpu-gr-intr.c +++ b/userspace/units/gr/intr/nvgpu-gr-intr.c @@ -37,7 +37,9 @@ #include #include #include +#include #include +#include #include @@ -264,12 +266,45 @@ static int gr_test_intr_cache_current_ctx(struct gk20a *g, return g->ops.gr.intr.stall_isr(g); } +static u64 nvgpu_gmmu_map_locked_stub(struct vm_gk20a *vm, + u64 vaddr, + struct nvgpu_sgt *sgt, + u64 buffer_offset, + u64 size, + u32 pgsz_idx, + u8 kind_v, + u32 ctag_offset, + u32 flags, + enum gk20a_mem_rw_flag rw_flag, + bool clear_ctags, + bool sparse, + bool priv, + struct vm_gk20a_mapping_batch *batch, + enum nvgpu_aperture aperture) +{ + return 1; +} + +static void nvgpu_gmmu_unmap_locked_stub(struct vm_gk20a *vm, + u64 vaddr, + u64 size, + u32 pgsz_idx, + bool va_allocated, + enum gk20a_mem_rw_flag rw_flag, + bool sparse, + struct vm_gk20a_mapping_batch *batch) +{ + return; +} + static int gr_test_intr_allocate_ch_tsg(struct unit_module *m, struct gk20a *g) { u32 tsgid = getpid(); + struct nvgpu_gr_ctx_mappings *mappings = NULL; struct nvgpu_channel *ch = NULL; struct nvgpu_tsg *tsg = NULL; + struct vm_gk20a *vm = NULL; bool sema_init, notify_init; int err; @@ -295,12 +330,46 @@ static int gr_test_intr_allocate_ch_tsg(struct unit_module *m, goto ch_cleanup; } + /* Setup VM */ + vm = nvgpu_vm_init(g, SZ_4K, SZ_4K << 10, + nvgpu_safe_sub_u64(1ULL << 37, SZ_4K << 10), + (1ULL << 32), 0ULL, + false, false, false, "dummy"); + if (!vm) { + unit_err(m, "failed to allocate VM"); + goto ch_cleanup; + } + + ch->g = g; + ch->vm = vm; + err = nvgpu_tsg_bind_channel(tsg, ch); if (err != 0) { unit_err(m, "failed tsg channel bind\n"); goto ch_cleanup; } + g->ops.mm.gmmu.map = nvgpu_gmmu_map_locked_stub; + g->ops.mm.gmmu.unmap = nvgpu_gmmu_unmap_locked_stub; + + err = nvgpu_tsg_subctx_alloc_gr_subctx(g, ch); + if (err != 0) { + unit_err(m, "failed to alloc gr subctx"); + goto ch_cleanup; + } + + err = nvgpu_tsg_subctx_setup_subctx_header(g, ch); + if (err != 0) { + unit_err(m, "failed to setup subctx header"); + goto ch_cleanup; + } + + mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, ch); + if (mappings == NULL) { + unit_err(m, "failed to allocate gr_ctx mappings"); + goto ch_cleanup; + } + err = gr_test_intr_block_ptr_as_current_ctx(m, g, ch, tsg, tsgid); if (err != 0) { unit_err(m, "isr failed with block_ptr as current_ctx\n"); diff --git a/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c b/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c index e8a5e5a62..d155b2db6 100644 --- a/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c +++ b/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -119,7 +120,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, struct nvgpu_gr_global_ctx_buffer_desc *global_desc; struct nvgpu_gr_ctx *gr_ctx = NULL; struct nvgpu_gr_ctx_mappings *mappings = NULL; - struct nvgpu_gr_subctx *subctx = NULL; + struct nvgpu_tsg_subctx *subctx = NULL; struct nvgpu_mem inst_block; struct nvgpu_gr_config *config = nvgpu_gr_get_config_ptr(g); struct nvgpu_posix_fault_inj *kmem_fi = @@ -132,6 +133,8 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, struct nvgpu_gr_config *config); struct nvgpu_tsg *tsg = (struct nvgpu_tsg *) malloc(sizeof(struct nvgpu_tsg)); + struct nvgpu_channel *channel = (struct nvgpu_channel *) + malloc(sizeof(struct nvgpu_channel)); /* Inject allocation failures and initialize obj_ctx, should fail */ nvgpu_posix_enable_fault_injection(kmem_fi, true, 0); @@ -196,16 +199,31 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, unit_return_fail(m, "failed to allocate global buffers"); } - subctx = nvgpu_gr_subctx_alloc(g, vm); - if (!subctx) { - unit_return_fail(m, "failed to allocate subcontext"); + channel->g = g; + channel->vm = vm; + + err = nvgpu_tsg_subctx_bind_channel(tsg, channel); + if (err != 0) { + unit_return_fail(m, "tsg subctx bind failed"); } - mappings = nvgpu_gr_ctx_mappings_create(g, tsg, vm); - if (mappings == NULL) { - unit_return_fail(m, "failed to allocate gr_ctx mappings"); + err = nvgpu_tsg_subctx_alloc_gr_subctx(g, channel); + if (err != 0) { + unit_return_fail(m, "failed to allocate gr_subctx"); } + err = nvgpu_tsg_subctx_setup_subctx_header(g, channel); + if (err != 0) { + unit_return_fail(m, "failed to setup subctx header"); + } + + mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, channel); + if (mappings == NULL) { + unit_return_fail(m, "failed to allocate or get mappings"); + } + + subctx = channel->subctx; + /* Fail gr_ctx allocation */ nvgpu_posix_enable_fault_injection(kmem_fi, true, 0); err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, @@ -396,7 +414,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, } /* Cleanup */ - nvgpu_gr_subctx_free(g, subctx, vm); + nvgpu_tsg_subctx_unbind_channel(tsg, channel); nvgpu_gr_ctx_free(g, gr_ctx, global_desc); nvgpu_free_gr_ctx_struct(g, gr_ctx); nvgpu_gr_ctx_desc_free(g, desc); diff --git a/userspace/units/gr/setup/nvgpu-gr-setup.c b/userspace/units/gr/setup/nvgpu-gr-setup.c index cee8cfdde..8e18a0235 100644 --- a/userspace/units/gr/setup/nvgpu-gr-setup.c +++ b/userspace/units/gr/setup/nvgpu-gr-setup.c @@ -209,12 +209,6 @@ static int gr_test_setup_allocate_ch_tsg(struct unit_module *m, goto ch_cleanup; } - err = nvgpu_tsg_bind_channel(tsg, ch); - if (err != 0) { - unit_err(m, "failed tsg channel bind\n"); - goto ch_cleanup; - } - err = gk20a_as_alloc_share(g, 0U, NVGPU_AS_ALLOC_UNIFIED_VA, U64(SZ_4K) << U64(10), @@ -230,6 +224,12 @@ static int gr_test_setup_allocate_ch_tsg(struct unit_module *m, goto tsg_unbind; } + err = nvgpu_tsg_bind_channel(tsg, ch); + if (err != 0) { + unit_err(m, "failed tsg channel bind\n"); + goto ch_cleanup; + } + gr_setup_ch = ch; gr_setup_tsg = tsg; @@ -574,7 +574,7 @@ static int gr_setup_alloc_no_tsg_subcontext(struct unit_module *m, struct gk20a static void gr_setup_fake_free_obj_ctx(struct unit_module *m, struct gk20a *g) { - struct nvgpu_gr_subctx *gr_subctx = gr_setup_ch->subctx; + struct nvgpu_tsg_subctx *gr_subctx = gr_setup_ch->subctx; /* pass NULL variable*/ gr_setup_ch->subctx = NULL;