diff --git a/arch/nvgpu-common.yaml b/arch/nvgpu-common.yaml index 3b412594e..240d2ede8 100644 --- a/arch/nvgpu-common.yaml +++ b/arch/nvgpu-common.yaml @@ -554,6 +554,11 @@ gr: sources: [ common/gr/obj_ctx.c, common/gr/obj_ctx_priv.h, include/nvgpu/gr/obj_ctx.h ] + ctx_mappings: + safe: yes + sources: [ common/gr/ctx_mappings.c, + common/gr/ctx_mappings_priv.h, + include/nvgpu/gr/ctx_mappings.h ] subctx: safe: yes sources: [ common/gr/subctx.c, diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index a05c5076e..718f910f2 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -254,6 +254,7 @@ nvgpu-y += \ common/gr/gr_intr.o \ common/gr/global_ctx.o \ common/gr/ctx.o \ + common/gr/ctx_mappings.o \ common/gr/gr_falcon.o \ common/gr/subctx.o \ common/gr/zcull.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 7e783b36f..59f7c4939 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -129,6 +129,7 @@ srcs += common/device.c \ common/gr/global_ctx.c \ common/gr/subctx.c \ common/gr/ctx.c \ + common/gr/ctx_mappings.c \ common/gr/gr_falcon.c \ common/gr/gr_config.c \ common/gr/gr_setup.c \ diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index df68830f9..40d217613 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -978,7 +978,7 @@ void nvgpu_tsg_release(struct nvgpu_ref *ref) } if ((tsg->gr_ctx != NULL) && (tsg->vm != NULL)) { - g->ops.gr.setup.free_gr_ctx(g, tsg->vm, tsg->gr_ctx); + g->ops.gr.setup.free_gr_ctx(g, tsg->gr_ctx); } #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL diff --git a/drivers/gpu/nvgpu/common/gr/ctx.c b/drivers/gpu/nvgpu/common/gr/ctx.c index 9466afa66..a25793f42 100644 --- a/drivers/gpu/nvgpu/common/gr/ctx.c +++ b/drivers/gpu/nvgpu/common/gr/ctx.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -33,11 +34,6 @@ #include #include "common/gr/ctx_priv.h" -static void nvgpu_gr_ctx_unmap_global_ctx_buffers(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, - struct vm_gk20a *vm); - struct nvgpu_gr_ctx_desc * nvgpu_gr_ctx_desc_alloc(struct gk20a *g) { @@ -58,6 +54,13 @@ void nvgpu_gr_ctx_set_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc, gr_ctx_desc->size[index] = size; } +u32 nvgpu_gr_ctx_get_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc, + u32 index) +{ + nvgpu_assert(index < NVGPU_GR_CTX_COUNT); + return gr_ctx_desc->size[index]; +} + struct nvgpu_gr_ctx *nvgpu_alloc_gr_ctx_struct(struct gk20a *g) { return nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_ctx)); @@ -68,390 +71,218 @@ void nvgpu_free_gr_ctx_struct(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) nvgpu_kfree(g, gr_ctx); } -int nvgpu_gr_ctx_alloc(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_ctx_desc *gr_ctx_desc, - struct vm_gk20a *vm) +void nvgpu_gr_ctx_free_ctx_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *ctx) +{ + u32 i; + + nvgpu_log(g, gpu_dbg_gr, " "); + + for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) { + if (nvgpu_mem_is_valid(&ctx->mem[i])) { + nvgpu_dma_free(g, &ctx->mem[i]); + } + } + + nvgpu_log(g, gpu_dbg_gr, "done"); +} + +int nvgpu_gr_ctx_alloc_ctx_buffers(struct gk20a *g, + struct nvgpu_gr_ctx_desc *desc, + struct nvgpu_gr_ctx *ctx) { int err = 0; + u32 i; - nvgpu_log_fn(g, " "); + nvgpu_log(g, gpu_dbg_gr, " "); - if (gr_ctx_desc->size[NVGPU_GR_CTX_CTX] == 0U) { + if (desc->size[NVGPU_GR_CTX_CTX] == 0U) { + nvgpu_err(g, "context buffer size not set"); return -EINVAL; } - err = nvgpu_dma_alloc(g, gr_ctx_desc->size[NVGPU_GR_CTX_CTX], - &gr_ctx->mem); - if (err != 0) { - return err; + for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) { + +#ifdef CONFIG_NVGPU_GFXP + /** + * Skip allocating the gfxp preemption buffers if GFXP mode is + * not set in the gr ctx. + */ + if ((i >= NVGPU_GR_CTX_PREEMPT_CTXSW) && + (i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW) && + (nvgpu_gr_ctx_get_graphics_preemption_mode(ctx) != + NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP)) { + continue; + } +#endif + + if (desc->size[i] != 0U) { + nvgpu_assert(!nvgpu_mem_is_valid(&ctx->mem[i])); + + err = nvgpu_dma_alloc_sys(g, desc->size[i], + &ctx->mem[i]); + if (err != 0) { + nvgpu_err(g, "ctx buffer %u alloc failed", i); + nvgpu_gr_ctx_free_ctx_buffers(g, ctx); + return err; + } + } } - gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm, - &gr_ctx->mem, - 0, /* not GPU-cacheable */ - gk20a_mem_flag_none, true, - gr_ctx->mem.aperture); - if (gr_ctx->mem.gpu_va == 0ULL) { - err = -ENOMEM; - goto err_free_mem; - } + ctx->ctx_id_valid = false; - gr_ctx->ctx_id_valid = false; - - return 0; - -err_free_mem: - nvgpu_dma_free(g, &gr_ctx->mem); + nvgpu_log(g, gpu_dbg_gr, "done"); return err; } -void nvgpu_gr_ctx_free(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, - struct vm_gk20a *vm) +void nvgpu_gr_ctx_init_ctx_buffers_mapping_flags(struct gk20a *g, + struct nvgpu_gr_ctx *ctx) { - nvgpu_log_fn(g, " "); + u32 i; - if (gr_ctx != NULL) { - nvgpu_gr_ctx_unmap_global_ctx_buffers(g, gr_ctx, - global_ctx_buffer, vm); + nvgpu_log(g, gpu_dbg_gr, " "); + + /** + * Map all ctx buffers as cacheable except GR CTX and + * PATCH CTX buffers. + */ + for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) { + ctx->mapping_flags[i] = NVGPU_VM_MAP_CACHEABLE; + } + + ctx->mapping_flags[NVGPU_GR_CTX_CTX] = 0U; + ctx->mapping_flags[NVGPU_GR_CTX_PATCH_CTX] = 0U; + + nvgpu_log(g, gpu_dbg_gr, "done"); +} -#ifdef CONFIG_NVGPU_DEBUGGER - nvgpu_gr_ctx_free_pm_ctx(g, vm, gr_ctx); -#endif - nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx); #ifdef CONFIG_NVGPU_GFXP - if (nvgpu_mem_is_valid(&gr_ctx->gfxp_rtvcb_ctxsw_buffer)) { - nvgpu_dma_unmap_free(vm, - &gr_ctx->gfxp_rtvcb_ctxsw_buffer); - } - nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); - nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); - nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); - nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); -#endif - - nvgpu_dma_unmap_free(vm, &gr_ctx->mem); - (void) memset(gr_ctx, 0, sizeof(*gr_ctx)); - } -} - -int nvgpu_gr_ctx_alloc_patch_ctx(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_ctx_desc *gr_ctx_desc, - struct vm_gk20a *vm) +static void nvgpu_gr_ctx_free_ctx_preemption_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *ctx) { - struct patch_desc *patch_ctx = &gr_ctx->patch_ctx; - int err = 0; - - nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "patch_ctx size = %u", - gr_ctx_desc->size[NVGPU_GR_CTX_PATCH_CTX]); - - err = nvgpu_dma_alloc_map_sys(vm, gr_ctx_desc->size[NVGPU_GR_CTX_PATCH_CTX], - &patch_ctx->mem); - if (err != 0) { - return err; - } - - return 0; -} - -void nvgpu_gr_ctx_free_patch_ctx(struct gk20a *g, struct vm_gk20a *vm, - struct nvgpu_gr_ctx *gr_ctx) -{ - struct patch_desc *patch_ctx = &gr_ctx->patch_ctx; - - (void)g; - - if (nvgpu_mem_is_valid(&patch_ctx->mem)) { - nvgpu_dma_unmap_free(vm, &patch_ctx->mem); - patch_ctx->data_count = 0; - } -} - -static void nvgpu_gr_ctx_unmap_global_ctx_buffers(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, - struct vm_gk20a *vm) -{ - u64 *g_bfr_va = &gr_ctx->global_ctx_buffer_va[0]; - u32 *g_bfr_index = &gr_ctx->global_ctx_buffer_index[0]; u32 i; nvgpu_log_fn(g, " "); - for (i = 0U; i < NVGPU_GR_GLOBAL_CTX_VA_COUNT; i++) { - if (g_bfr_va[i] != 0ULL) { - nvgpu_gr_global_ctx_buffer_unmap(global_ctx_buffer, - g_bfr_index[i], vm, g_bfr_va[i]); + for (i = NVGPU_GR_CTX_PREEMPT_CTXSW; + i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) { + if (nvgpu_mem_is_valid(&ctx->mem[i])) { + nvgpu_dma_free(g, &ctx->mem[i]); } } - (void) memset(g_bfr_va, 0, sizeof(gr_ctx->global_ctx_buffer_va)); - (void) memset(g_bfr_index, 0, sizeof(gr_ctx->global_ctx_buffer_index)); + nvgpu_log_fn(g, "done"); } -static int nvgpu_gr_ctx_map_ctx_circular_buffer(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, - struct vm_gk20a *vm, bool vpr) +int nvgpu_gr_ctx_alloc_ctx_preemption_buffers(struct gk20a *g, + struct nvgpu_gr_ctx_desc *desc, + struct nvgpu_gr_ctx *ctx) { - u64 *g_bfr_va; - u32 *g_bfr_index; - u64 gpu_va = 0ULL; + int err = 0; + u32 i; - (void)g; - (void)vpr; + nvgpu_log(g, gpu_dbg_gr, " "); - g_bfr_va = &gr_ctx->global_ctx_buffer_va[0]; - g_bfr_index = &gr_ctx->global_ctx_buffer_index[0]; - -#ifdef CONFIG_NVGPU_VPR - if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer, - NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR)) { - gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, - NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR, - vm, NVGPU_VM_MAP_CACHEABLE, true); - g_bfr_index[NVGPU_GR_GLOBAL_CTX_CIRCULAR_VA] = - NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR; - } else { -#endif - gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, - NVGPU_GR_GLOBAL_CTX_CIRCULAR, - vm, NVGPU_VM_MAP_CACHEABLE, true); - g_bfr_index[NVGPU_GR_GLOBAL_CTX_CIRCULAR_VA] = - NVGPU_GR_GLOBAL_CTX_CIRCULAR; -#ifdef CONFIG_NVGPU_VPR - } -#endif - if (gpu_va == 0ULL) { - goto clean_up; - } - g_bfr_va[NVGPU_GR_GLOBAL_CTX_CIRCULAR_VA] = gpu_va; - - return 0; - -clean_up: - return -ENOMEM; -} - -static int nvgpu_gr_ctx_map_ctx_attribute_buffer(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, - struct vm_gk20a *vm, bool vpr) -{ - u64 *g_bfr_va; - u32 *g_bfr_index; - u64 gpu_va = 0ULL; - - (void)g; - (void)vpr; - - g_bfr_va = &gr_ctx->global_ctx_buffer_va[0]; - g_bfr_index = &gr_ctx->global_ctx_buffer_index[0]; - -#ifdef CONFIG_NVGPU_VPR - if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer, - NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR)) { - gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, - NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR, - vm, NVGPU_VM_MAP_CACHEABLE, false); - g_bfr_index[NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VA] = - NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR; - } else { -#endif - gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, - NVGPU_GR_GLOBAL_CTX_ATTRIBUTE, - vm, NVGPU_VM_MAP_CACHEABLE, false); - g_bfr_index[NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VA] = - NVGPU_GR_GLOBAL_CTX_ATTRIBUTE; -#ifdef CONFIG_NVGPU_VPR - } -#endif - if (gpu_va == 0ULL) { - goto clean_up; - } - g_bfr_va[NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VA] = gpu_va; - - return 0; - -clean_up: - return -ENOMEM; -} - - -static int nvgpu_gr_ctx_map_ctx_pagepool_buffer(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, - struct vm_gk20a *vm, bool vpr) -{ - u64 *g_bfr_va; - u32 *g_bfr_index; - u64 gpu_va = 0ULL; - - (void)g; - (void)vpr; - - g_bfr_va = &gr_ctx->global_ctx_buffer_va[0]; - g_bfr_index = &gr_ctx->global_ctx_buffer_index[0]; - -#ifdef CONFIG_NVGPU_VPR - if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer, - NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR)) { - gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, - NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR, - vm, NVGPU_VM_MAP_CACHEABLE, true); - g_bfr_index[NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VA] = - NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR; - } else { -#endif - gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, - NVGPU_GR_GLOBAL_CTX_PAGEPOOL, - vm, NVGPU_VM_MAP_CACHEABLE, true); - g_bfr_index[NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VA] = - NVGPU_GR_GLOBAL_CTX_PAGEPOOL; -#ifdef CONFIG_NVGPU_VPR - } -#endif - if (gpu_va == 0ULL) { - goto clean_up; - } - g_bfr_va[NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VA] = gpu_va; - - return 0; - -clean_up: - return -ENOMEM; -} - -static int nvgpu_gr_ctx_map_ctx_buffer(struct gk20a *g, - u32 buffer_type, u32 va_type, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, - struct vm_gk20a *vm) -{ - u64 *g_bfr_va; - u32 *g_bfr_index; - u64 gpu_va = 0ULL; - - (void)g; - - g_bfr_va = &gr_ctx->global_ctx_buffer_va[0]; - g_bfr_index = &gr_ctx->global_ctx_buffer_index[0]; - - gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, - buffer_type, vm, 0, true); - if (gpu_va == 0ULL) { - goto clean_up; - } - - g_bfr_index[va_type] = buffer_type; - g_bfr_va[va_type] = gpu_va; - - return 0; - -clean_up: - return -ENOMEM; -} - -int nvgpu_gr_ctx_map_global_ctx_buffers(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, - struct vm_gk20a *vm, bool vpr) -{ - int err; - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); - - /* - * MIG supports only compute class. - * Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB - * if 2D/3D/I2M classes(graphics) are supported. + /** + * Skip allocating the gfxp preemption buffers if GFXP mode is + * not set in the gr ctx. */ - if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { - /* Circular Buffer */ - err = nvgpu_gr_ctx_map_ctx_circular_buffer(g, gr_ctx, - global_ctx_buffer, vm, vpr); - if (err != 0) { - nvgpu_err(g, "cannot map ctx circular buffer"); - goto fail; - } + if (nvgpu_gr_ctx_get_graphics_preemption_mode(ctx) != + NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { + nvgpu_log(g, gpu_dbg_gr, "GFXP mode not set. Skip preemption " + "buffers allocation"); + return 0; + } - /* Attribute Buffer */ - err = nvgpu_gr_ctx_map_ctx_attribute_buffer(g, gr_ctx, - global_ctx_buffer, vm, vpr); - if (err != 0) { - nvgpu_err(g, "cannot map ctx attribute buffer"); - goto fail; - } + for (i = NVGPU_GR_CTX_PREEMPT_CTXSW; + i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) { - /* Page Pool */ - err = nvgpu_gr_ctx_map_ctx_pagepool_buffer(g, gr_ctx, - global_ctx_buffer, vm, vpr); - if (err != 0) { - nvgpu_err(g, "cannot map ctx pagepool buffer"); - goto fail; - } -#ifdef CONFIG_NVGPU_GRAPHICS - /* RTV circular buffer */ - if (nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer, - NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER)) { - err = nvgpu_gr_ctx_map_ctx_buffer(g, - NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER, - NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER_VA, - gr_ctx, global_ctx_buffer, vm); + if (desc->size[i] != 0U && !nvgpu_mem_is_valid(&ctx->mem[i])) { + err = nvgpu_dma_alloc_sys(g, desc->size[i], + &ctx->mem[i]); if (err != 0) { - nvgpu_err(g, - "cannot map ctx rtv circular buffer"); - goto fail; + nvgpu_err(g, "ctx preemption buffer %u alloc failed", i); + nvgpu_gr_ctx_free_ctx_preemption_buffers(g, ctx); + return err; } } -#endif } - /* Priv register Access Map */ - err = nvgpu_gr_ctx_map_ctx_buffer(g, - NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP, - NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_VA, - gr_ctx, global_ctx_buffer, vm); - if (err != 0) { - nvgpu_err(g, "cannot map ctx priv access buffer"); - goto fail; - } + nvgpu_log(g, gpu_dbg_gr, "done"); -#ifdef CONFIG_NVGPU_FECS_TRACE - /* FECS trace buffer */ - if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) { - err = nvgpu_gr_ctx_map_ctx_buffer(g, - NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER, - NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER_VA, - gr_ctx, global_ctx_buffer, vm); - if (err != 0) { - nvgpu_err(g, "cannot map ctx fecs trace buffer"); - goto fail; - } - } -#endif - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); - return 0; - -fail: - nvgpu_gr_ctx_unmap_global_ctx_buffers(g, gr_ctx, global_ctx_buffer, vm); return err; } +#endif -u64 nvgpu_gr_ctx_get_global_ctx_va(struct nvgpu_gr_ctx *gr_ctx, - u32 index) +void nvgpu_gr_ctx_free(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer) { - return gr_ctx->global_ctx_buffer_va[index]; + nvgpu_log(g, gpu_dbg_gr, " "); + + if ((gr_ctx != NULL) && (gr_ctx->mappings != NULL)) { + nvgpu_gr_ctx_unmap_buffers(g, + gr_ctx, global_ctx_buffer, gr_ctx->mappings); + + nvgpu_gr_ctx_free_mappings(g, gr_ctx); + + nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0); + + nvgpu_gr_ctx_free_ctx_buffers(g, gr_ctx); + + (void) memset(gr_ctx, 0, sizeof(*gr_ctx)); + } + + nvgpu_log(g, gpu_dbg_gr, "done"); } -struct nvgpu_mem *nvgpu_gr_ctx_get_patch_ctx_mem(struct nvgpu_gr_ctx *gr_ctx) +struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_alloc_or_get_mappings(struct gk20a *g, + struct nvgpu_tsg *tsg, struct vm_gk20a *vm) { - return &gr_ctx->patch_ctx.mem; + struct nvgpu_gr_ctx_mappings *mappings = NULL; + struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx; + + nvgpu_log(g, gpu_dbg_gr, " "); + + mappings = gr_ctx->mappings; + if (mappings != NULL) { + return mappings; + } + + mappings = nvgpu_gr_ctx_mappings_create(g, tsg, vm); + if (mappings == NULL) { + nvgpu_err(g, "failed to allocate gr_ctx mappings"); + return mappings; + } + + gr_ctx->mappings = mappings; + + nvgpu_log(g, gpu_dbg_gr, "done"); + + return mappings; +} + +void nvgpu_gr_ctx_free_mappings(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx) +{ + nvgpu_log(g, gpu_dbg_gr, " "); + + if (gr_ctx->mappings == NULL) { + return; + } + + nvgpu_gr_ctx_mappings_free(g, gr_ctx->mappings); + gr_ctx->mappings = NULL; + + nvgpu_log(g, gpu_dbg_gr, "done"); +} + +struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_get_mappings(struct nvgpu_tsg *tsg) +{ + struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx; + + return gr_ctx->mappings; } void nvgpu_gr_ctx_set_patch_ctx_data_count(struct nvgpu_gr_ctx *gr_ctx, @@ -460,9 +291,17 @@ void nvgpu_gr_ctx_set_patch_ctx_data_count(struct nvgpu_gr_ctx *gr_ctx, gr_ctx->patch_ctx.data_count = data_count; } -struct nvgpu_mem *nvgpu_gr_ctx_get_ctx_mem(struct nvgpu_gr_ctx *gr_ctx) +struct nvgpu_mem *nvgpu_gr_ctx_get_ctx_mem(struct nvgpu_gr_ctx *gr_ctx, + u32 index) { - return &gr_ctx->mem; + nvgpu_assert(index < NVGPU_GR_CTX_COUNT); + return &gr_ctx->mem[index]; +} + +u32 nvgpu_gr_ctx_get_ctx_mapping_flags(struct nvgpu_gr_ctx *gr_ctx, u32 index) +{ + nvgpu_assert(index < NVGPU_GR_CTX_COUNT); + return gr_ctx->mapping_flags[index]; } #ifdef CONFIG_NVGPU_SM_DIVERSITY @@ -481,6 +320,7 @@ u32 nvgpu_gr_ctx_get_sm_diversity_config(struct nvgpu_gr_ctx *gr_ctx) /* load saved fresh copy of gloden image into channel gr_ctx */ void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_mappings *mappings, struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image, bool cde) { @@ -493,7 +333,7 @@ void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g, nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); - mem = &gr_ctx->mem; + mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; nvgpu_gr_global_ctx_load_local_golden_image(g, local_golden_image, mem); @@ -513,7 +353,7 @@ void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g, g->ops.gr.ctxsw_prog.set_priv_access_map_config_mode(g, mem, g->allow_all); g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, mem, - nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, + nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings, NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_VA)); #endif @@ -535,7 +375,8 @@ void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g, g->ops.gr.ctxsw_prog.set_patch_count(g, mem, gr_ctx->patch_ctx.data_count); g->ops.gr.ctxsw_prog.set_patch_addr(g, mem, - gr_ctx->patch_ctx.mem.gpu_va); + nvgpu_gr_ctx_mappings_get_ctx_va(mappings, + NVGPU_GR_CTX_PATCH_CTX)); #ifdef CONFIG_NVGPU_DEBUGGER /* PM ctxt switch is off by default */ @@ -561,10 +402,12 @@ void nvgpu_gr_ctx_patch_write_begin(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, bool update_patch_count) { + struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; + if (update_patch_count) { /* reset patch count if ucode has already processed it */ gr_ctx->patch_ctx.data_count = - g->ops.gr.ctxsw_prog.get_patch_count(g, &gr_ctx->mem); + g->ops.gr.ctxsw_prog.get_patch_count(g, mem); nvgpu_log(g, gpu_dbg_info, "patch count reset to %d", gr_ctx->patch_ctx.data_count); } @@ -574,9 +417,11 @@ void nvgpu_gr_ctx_patch_write_end(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, bool update_patch_count) { + struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; + /* Write context count to context image if it is mapped */ if (update_patch_count) { - g->ops.gr.ctxsw_prog.set_patch_count(g, &gr_ctx->mem, + g->ops.gr.ctxsw_prog.set_patch_count(g, mem, gr_ctx->patch_ctx.data_count); nvgpu_log(g, gpu_dbg_info, "write patch count %d", gr_ctx->patch_ctx.data_count); @@ -590,6 +435,7 @@ void nvgpu_gr_ctx_patch_write(struct gk20a *g, if (patch) { u32 patch_slot; u64 patch_slot_max; + struct nvgpu_mem *patch_ctx_mem; if (gr_ctx == NULL) { nvgpu_err(g, @@ -597,13 +443,15 @@ void nvgpu_gr_ctx_patch_write(struct gk20a *g, return; } + patch_ctx_mem = &gr_ctx->mem[NVGPU_GR_CTX_PATCH_CTX]; + patch_slot = nvgpu_safe_mult_u32(gr_ctx->patch_ctx.data_count, PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY); patch_slot_max = nvgpu_safe_sub_u64( PATCH_CTX_ENTRIES_FROM_SIZE( - gr_ctx->patch_ctx.mem.size), + patch_ctx_mem->size), PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY); if (patch_slot > patch_slot_max) { @@ -612,10 +460,8 @@ void nvgpu_gr_ctx_patch_write(struct gk20a *g, return; } - nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, - (u64)patch_slot, addr); - nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, - (u64)patch_slot + 1ULL, data); + nvgpu_mem_wr32(g, patch_ctx_mem, (u64)patch_slot, addr); + nvgpu_mem_wr32(g, patch_ctx_mem, (u64)patch_slot + 1ULL, data); gr_ctx->patch_ctx.data_count = nvgpu_safe_add_u32( gr_ctx->patch_ctx.data_count, 1U); nvgpu_log(g, gpu_dbg_info, @@ -688,25 +534,23 @@ bool nvgpu_gr_ctx_check_valid_preemption_mode(struct gk20a *g, void nvgpu_gr_ctx_set_preemption_modes(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) { + struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; + #ifdef CONFIG_NVGPU_GFXP if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { - g->ops.gr.ctxsw_prog.set_graphics_preemption_mode_gfxp(g, - &gr_ctx->mem); + g->ops.gr.ctxsw_prog.set_graphics_preemption_mode_gfxp(g, mem); } #endif #ifdef CONFIG_NVGPU_CILP if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP) { - g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cilp(g, - &gr_ctx->mem); + g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cilp(g, mem); } #endif if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) { - g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g, - &gr_ctx->mem); + g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g, mem); } - } void nvgpu_gr_ctx_set_tsgid(struct nvgpu_gr_ctx *gr_ctx, u32 tsgid) @@ -749,10 +593,12 @@ u64 nvgpu_gr_ctx_get_zcull_ctx_va(struct nvgpu_gr_ctx *gr_ctx) int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) { + struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; + nvgpu_log(g, gpu_dbg_gr, " "); - g->ops.gr.ctxsw_prog.set_zcull_mode_no_ctxsw(g, &gr_ctx->mem); - g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &gr_ctx->mem, 0); + g->ops.gr.ctxsw_prog.set_zcull_mode_no_ctxsw(g, mem); + g->ops.gr.ctxsw_prog.set_zcull_ptr(g, mem, 0); return 0; } @@ -760,6 +606,8 @@ int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, bool set_zcull_ptr) { + struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; + nvgpu_log_fn(g, " "); if (gr_ctx->zcull_ctx.gpu_va == 0ULL && @@ -768,11 +616,10 @@ int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, return -EINVAL; } - g->ops.gr.ctxsw_prog.set_zcull(g, &gr_ctx->mem, - gr_ctx->zcull_ctx.ctx_sw_mode); + g->ops.gr.ctxsw_prog.set_zcull(g, mem, gr_ctx->zcull_ctx.ctx_sw_mode); if (set_zcull_ptr) { - g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &gr_ctx->mem, + g->ops.gr.ctxsw_prog.set_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va); } @@ -782,14 +629,19 @@ int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, #ifdef CONFIG_NVGPU_GFXP void nvgpu_gr_ctx_set_preemption_buffer_va(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx) + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_mappings *mappings) { - g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, &gr_ctx->mem, - gr_ctx->preempt_ctxsw_buffer.gpu_va); + struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; + u64 preempt_ctxsw_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, + NVGPU_GR_CTX_PREEMPT_CTXSW); + + g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, mem, + preempt_ctxsw_gpu_va); if (g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0 != NULL) { g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g, - &gr_ctx->mem, gr_ctx->preempt_ctxsw_buffer.gpu_va); + mem, preempt_ctxsw_gpu_va); } } @@ -797,154 +649,6 @@ bool nvgpu_gr_ctx_desc_force_preemption_gfxp(struct nvgpu_gr_ctx_desc *gr_ctx_de { return gr_ctx_desc->force_preemption_gfxp; } - -static int nvgpu_gr_ctx_alloc_ctxsw_buffer(struct vm_gk20a *vm, size_t size, - struct nvgpu_mem *mem) -{ - int err; - - err = nvgpu_dma_alloc_sys(vm->mm->g, size, mem); - if (err != 0) { - return err; - } - - mem->gpu_va = nvgpu_gmmu_map_partial(vm, - mem, - mem->aligned_size, - NVGPU_VM_MAP_CACHEABLE, - gk20a_mem_flag_none, - false, - mem->aperture); - if (mem->gpu_va == 0ULL) { - nvgpu_dma_free(vm->mm->g, mem); - return -ENOMEM; - } - - return 0; -} - -static int nvgpu_gr_ctx_alloc_preemption_buffers(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_ctx_desc *gr_ctx_desc, - struct vm_gk20a *vm) -{ - int err = 0; - - err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm, - gr_ctx_desc->size[NVGPU_GR_CTX_PREEMPT_CTXSW], - &gr_ctx->preempt_ctxsw_buffer); - if (err != 0) { - nvgpu_err(g, "cannot allocate preempt buffer"); - goto fail; - } - - err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm, - gr_ctx_desc->size[NVGPU_GR_CTX_SPILL_CTXSW], - &gr_ctx->spill_ctxsw_buffer); - if (err != 0) { - nvgpu_err(g, "cannot allocate spill buffer"); - goto fail_free_preempt; - } - - err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm, - gr_ctx_desc->size[NVGPU_GR_CTX_BETACB_CTXSW], - &gr_ctx->betacb_ctxsw_buffer); - if (err != 0) { - nvgpu_err(g, "cannot allocate beta buffer"); - goto fail_free_spill; - } - - if (gr_ctx_desc->size[NVGPU_GR_CTX_GFXP_RTVCB_CTXSW] != 0U) { - err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm, - gr_ctx_desc->size[NVGPU_GR_CTX_GFXP_RTVCB_CTXSW], - &gr_ctx->gfxp_rtvcb_ctxsw_buffer); - if (err != 0) { - nvgpu_err(g, "cannot allocate gfxp rtvcb"); - goto fail_free_betacb; - } - } - return 0; - -fail_free_betacb: - nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); -fail_free_spill: - nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); -fail_free_preempt: - nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); -fail: - return err; -} - -int nvgpu_gr_ctx_alloc_ctxsw_buffers(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_ctx_desc *gr_ctx_desc, - struct vm_gk20a *vm) -{ - int err = 0; - - /* nothing to do if already initialized */ - if (nvgpu_mem_is_valid(&gr_ctx->preempt_ctxsw_buffer)) { - return 0; - } - - if (gr_ctx_desc->size[NVGPU_GR_CTX_PREEMPT_CTXSW] == 0U || - gr_ctx_desc->size[NVGPU_GR_CTX_SPILL_CTXSW] == 0U || - gr_ctx_desc->size[NVGPU_GR_CTX_BETACB_CTXSW] == 0U || - gr_ctx_desc->size[NVGPU_GR_CTX_PAGEPOOL_CTXSW] == 0U) { - return -EINVAL; - } - - err = nvgpu_gr_ctx_alloc_preemption_buffers(g, gr_ctx, - gr_ctx_desc, vm); - - if (err != 0) { - nvgpu_err(g, "cannot allocate preemption buffers"); - goto fail; - } - - err = nvgpu_gr_ctx_alloc_ctxsw_buffer(vm, - gr_ctx_desc->size[NVGPU_GR_CTX_PAGEPOOL_CTXSW], - &gr_ctx->pagepool_ctxsw_buffer); - if (err != 0) { - nvgpu_err(g, "cannot allocate page pool"); - goto fail; - } - - return 0; - -fail: - return err; -} - -struct nvgpu_mem *nvgpu_gr_ctx_get_preempt_ctxsw_buffer( - struct nvgpu_gr_ctx *gr_ctx) -{ - return &gr_ctx->preempt_ctxsw_buffer; -} - -struct nvgpu_mem *nvgpu_gr_ctx_get_spill_ctxsw_buffer( - struct nvgpu_gr_ctx *gr_ctx) -{ - return &gr_ctx->spill_ctxsw_buffer; -} - -struct nvgpu_mem *nvgpu_gr_ctx_get_betacb_ctxsw_buffer( - struct nvgpu_gr_ctx *gr_ctx) -{ - return &gr_ctx->betacb_ctxsw_buffer; -} - -struct nvgpu_mem *nvgpu_gr_ctx_get_pagepool_ctxsw_buffer( - struct nvgpu_gr_ctx *gr_ctx) -{ - return &gr_ctx->pagepool_ctxsw_buffer; -} - -struct nvgpu_mem *nvgpu_gr_ctx_get_gfxp_rtvcb_ctxsw_buffer( - struct nvgpu_gr_ctx *gr_ctx) -{ - return &gr_ctx->gfxp_rtvcb_ctxsw_buffer; -} #endif /* CONFIG_NVGPU_GFXP */ #ifdef CONFIG_NVGPU_CILP @@ -969,9 +673,10 @@ void nvgpu_gr_ctx_set_cilp_preempt_pending(struct nvgpu_gr_ctx *gr_ctx, void nvgpu_gr_ctx_reset_patch_count(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) { + struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; u32 tmp; - tmp = g->ops.gr.ctxsw_prog.get_patch_count(g, &gr_ctx->mem); + tmp = g->ops.gr.ctxsw_prog.get_patch_count(g, mem); if (tmp == 0U) { gr_ctx->patch_ctx.data_count = 0; } @@ -979,63 +684,82 @@ void nvgpu_gr_ctx_reset_patch_count(struct gk20a *g, void nvgpu_gr_ctx_set_patch_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) { - g->ops.gr.ctxsw_prog.set_patch_count(g, &gr_ctx->mem, + struct nvgpu_gr_ctx_mappings *mappings = gr_ctx->mappings; + struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; + + g->ops.gr.ctxsw_prog.set_patch_count(g, mem, gr_ctx->patch_ctx.data_count); - g->ops.gr.ctxsw_prog.set_patch_addr(g, &gr_ctx->mem, - gr_ctx->patch_ctx.mem.gpu_va); + g->ops.gr.ctxsw_prog.set_patch_addr(g, mem, + nvgpu_gr_ctx_mappings_get_ctx_va(mappings, + NVGPU_GR_CTX_PATCH_CTX)); } -int nvgpu_gr_ctx_alloc_pm_ctx(struct gk20a *g, +static int nvgpu_gr_ctx_alloc_pm_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_ctx_desc *gr_ctx_desc, - struct vm_gk20a *vm) + struct nvgpu_gr_ctx_desc *gr_ctx_desc) { - struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx; int err; - if (pm_ctx->mem.gpu_va != 0ULL) { - return 0; - } - err = nvgpu_dma_alloc_sys(g, gr_ctx_desc->size[NVGPU_GR_CTX_PM_CTX], - &pm_ctx->mem); + &gr_ctx->mem[NVGPU_GR_CTX_PM_CTX]); if (err != 0) { nvgpu_err(g, "failed to allocate pm ctx buffer"); return err; } - pm_ctx->mem.gpu_va = nvgpu_gmmu_map(vm, - &pm_ctx->mem, - NVGPU_VM_MAP_CACHEABLE, - gk20a_mem_flag_none, true, - pm_ctx->mem.aperture); - if (pm_ctx->mem.gpu_va == 0ULL) { - nvgpu_err(g, - "failed to map pm ctxt buffer"); - nvgpu_dma_free(g, &pm_ctx->mem); - return -ENOMEM; - } - return 0; } -void nvgpu_gr_ctx_free_pm_ctx(struct gk20a *g, struct vm_gk20a *vm, - struct nvgpu_gr_ctx *gr_ctx) +static void nvgpu_gr_ctx_free_pm_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) { - struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx; - - if (pm_ctx->mem.gpu_va != 0ULL) { - nvgpu_dma_unmap_free(vm, &pm_ctx->mem); + if (nvgpu_mem_is_valid(&gr_ctx->mem[NVGPU_GR_CTX_PM_CTX])) { + nvgpu_dma_free(g, &gr_ctx->mem[NVGPU_GR_CTX_PM_CTX]); } (void)g; } -struct nvgpu_mem *nvgpu_gr_ctx_get_pm_ctx_mem(struct nvgpu_gr_ctx *gr_ctx) +int nvgpu_gr_ctx_alloc_map_pm_ctx(struct gk20a *g, + struct nvgpu_tsg *tsg, + struct nvgpu_gr_ctx_desc *gr_ctx_desc, + struct nvgpu_gr_hwpm_map *hwpm_map) { - return &gr_ctx->pm_ctx.mem; + struct nvgpu_gr_ctx *gr_ctx = tsg->gr_ctx; + struct nvgpu_gr_ctx_mappings *mappings; + int ret; + + if (gr_ctx->pm_ctx.mapped) { + return 0; + } + + mappings = nvgpu_gr_ctx_get_mappings(tsg); + if (mappings == NULL) { + nvgpu_err(g, "gr_ctx mappings struct not allocated"); + return -ENOMEM; + } + + nvgpu_gr_ctx_set_size(gr_ctx_desc, + NVGPU_GR_CTX_PM_CTX, + nvgpu_gr_hwpm_map_get_size(hwpm_map)); + + ret = nvgpu_gr_ctx_alloc_pm_ctx(g, gr_ctx, gr_ctx_desc); + if (ret != 0) { + nvgpu_err(g, + "failed to allocate pm ctxt buffer"); + return ret; + } + + ret = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, gr_ctx, + NVGPU_GR_CTX_PM_CTX, mappings); + if (ret != 0) { + nvgpu_err(g, "gr_ctx pm_ctx buffer map failed %d", ret); + nvgpu_gr_ctx_free_pm_ctx(g, gr_ctx); + return ret; + } + + return 0; } void nvgpu_gr_ctx_set_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx, u32 pm_mode) @@ -1050,9 +774,11 @@ u32 nvgpu_gr_ctx_get_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx) u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) { + struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; + if (!gr_ctx->ctx_id_valid) { gr_ctx->ctx_id = g->ops.gr.ctxsw_prog.get_main_image_ctx_id(g, - &gr_ctx->mem); + mem); gr_ctx->ctx_id_valid = true; } @@ -1089,25 +815,30 @@ bool nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close( int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, bool enable) { - if (!nvgpu_mem_is_valid(&gr_ctx->mem)) { + struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; + + if (!nvgpu_mem_is_valid(mem)) { nvgpu_err(g, "no graphics context allocated"); return -EFAULT; } - g->ops.gr.ctxsw_prog.set_pm_smpc_mode(g, &gr_ctx->mem, enable); + g->ops.gr.ctxsw_prog.set_pm_smpc_mode(g, mem, enable); return 0; } -int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, - u32 mode, bool *skip_update) +int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + u32 mode, u64 *pm_ctx_gpu_va, bool *skip_update) { + struct nvgpu_gr_ctx_mappings *mappings = gr_ctx->mappings; + struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx; int ret = 0; *skip_update = false; - if (!nvgpu_mem_is_valid(&gr_ctx->mem)) { + if (!nvgpu_mem_is_valid(mem)) { nvgpu_err(g, "no graphics context allocated"); return -EFAULT; } @@ -1127,7 +858,8 @@ int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, return 0; } pm_ctx->pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw(); - pm_ctx->gpu_va = pm_ctx->mem.gpu_va; + *pm_ctx_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, + NVGPU_GR_CTX_PM_CTX); break; case NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW: if (pm_ctx->pm_mode == @@ -1137,7 +869,7 @@ int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, } pm_ctx->pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw(); - pm_ctx->gpu_va = 0; + *pm_ctx_gpu_va = 0; break; case NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW: if (pm_ctx->pm_mode == @@ -1147,7 +879,8 @@ int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, } pm_ctx->pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw(); - pm_ctx->gpu_va = pm_ctx->mem.gpu_va; + *pm_ctx_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, + NVGPU_GR_CTX_PM_CTX); break; default: nvgpu_err(g, "invalid hwpm context switch mode"); @@ -1160,13 +893,21 @@ int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, void nvgpu_gr_ctx_set_hwpm_pm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) { - g->ops.gr.ctxsw_prog.set_pm_mode(g, &gr_ctx->mem, - gr_ctx->pm_ctx.pm_mode); + struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; + + g->ops.gr.ctxsw_prog.set_pm_mode(g, mem, gr_ctx->pm_ctx.pm_mode); } -void nvgpu_gr_ctx_set_hwpm_ptr(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) +void nvgpu_gr_ctx_set_hwpm_ptr(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + u64 pm_ctx_gpu_va) { - g->ops.gr.ctxsw_prog.set_pm_ptr(g, &gr_ctx->mem, - gr_ctx->pm_ctx.gpu_va); + struct nvgpu_mem *mem = &gr_ctx->mem[NVGPU_GR_CTX_CTX]; + + g->ops.gr.ctxsw_prog.set_pm_ptr(g, mem, pm_ctx_gpu_va); +} + +void nvgpu_gr_ctx_set_pm_ctx_mapped(struct nvgpu_gr_ctx *ctx, bool mapped) +{ + ctx->pm_ctx.mapped = mapped; } #endif /* CONFIG_NVGPU_DEBUGGER */ diff --git a/drivers/gpu/nvgpu/common/gr/ctx_mappings.c b/drivers/gpu/nvgpu/common/gr/ctx_mappings.c new file mode 100644 index 000000000..541066a11 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/ctx_mappings.c @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "common/gr/ctx_mappings_priv.h" + +struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_mappings_create(struct gk20a *g, + struct nvgpu_tsg *tsg, struct vm_gk20a *vm) +{ + struct nvgpu_gr_ctx_mappings *mappings = NULL; + + nvgpu_log(g, gpu_dbg_gr, " "); + + if (tsg == NULL || vm == NULL) { + return NULL; + } + + mappings = (struct nvgpu_gr_ctx_mappings *) + nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_ctx_mappings)); + if (mappings == NULL) { + nvgpu_err(g, "failed to alloc mappings"); + return NULL; + } + + nvgpu_vm_get(vm); + mappings->tsg = tsg; + mappings->vm = vm; + + nvgpu_log(g, gpu_dbg_gr, "done"); + + return mappings; +} + +void nvgpu_gr_ctx_mappings_free(struct gk20a *g, + struct nvgpu_gr_ctx_mappings *mappings) +{ + nvgpu_log(g, gpu_dbg_gr, " "); + + nvgpu_vm_put(mappings->vm); + nvgpu_kfree(g, mappings); + + nvgpu_log(g, gpu_dbg_gr, "done"); +} + +int nvgpu_gr_ctx_mappings_map_ctx_buffer(struct gk20a *g, + struct nvgpu_gr_ctx *ctx, u32 index, + struct nvgpu_gr_ctx_mappings *mappings) +{ + struct vm_gk20a *vm = mappings->vm; + struct nvgpu_mem *mem; + u32 mapping_flags; + u64 gpu_va; + + nvgpu_log(g, gpu_dbg_gr, " "); + + mem = nvgpu_gr_ctx_get_ctx_mem(ctx, index); + mapping_flags = nvgpu_gr_ctx_get_ctx_mapping_flags(ctx, index); + + nvgpu_assert(mappings->ctx_buffer_va[index] == 0ULL); + + if (nvgpu_mem_is_valid(mem)) { + gpu_va = nvgpu_gmmu_map(vm, + mem, + mapping_flags, + gk20a_mem_flag_none, true, + mem->aperture); + if (gpu_va == 0ULL) { + nvgpu_err(g, "failed to map ctx buffer %u", index); + return -ENOMEM; + } + + mappings->ctx_buffer_va[index] = gpu_va; + + nvgpu_log(g, gpu_dbg_gr, "buffer[%u] mapped at address 0x%llx", index, gpu_va); + +#ifdef CONFIG_NVGPU_DEBUGGER + if (index == NVGPU_GR_CTX_PM_CTX) { + nvgpu_gr_ctx_set_pm_ctx_mapped(ctx, true); + } +#endif + } else { + nvgpu_log(g, gpu_dbg_gr, "buffer not allocated"); + } + + nvgpu_log(g, gpu_dbg_gr, "done"); + + return 0; +} + +static void nvgpu_gr_ctx_mappings_unmap_ctx_buffer(struct nvgpu_gr_ctx *ctx, + u32 index, struct nvgpu_gr_ctx_mappings *mappings) +{ + struct vm_gk20a *vm = mappings->vm; + struct nvgpu_mem *mem; + + mem = nvgpu_gr_ctx_get_ctx_mem(ctx, index); + + if (nvgpu_mem_is_valid(mem) && + (mappings->ctx_buffer_va[index] != 0ULL)) { + nvgpu_gmmu_unmap_addr(vm, mem, mappings->ctx_buffer_va[index]); + mappings->ctx_buffer_va[index] = 0ULL; + +#ifdef CONFIG_NVGPU_DEBUGGER + if (index == NVGPU_GR_CTX_PM_CTX) { + nvgpu_gr_ctx_set_pm_ctx_mapped(ctx, false); + } +#endif + } +} + +static void nvgpu_gr_ctx_mappings_unmap_ctx_buffers(struct nvgpu_gr_ctx *ctx, + struct nvgpu_gr_ctx_mappings *mappings) +{ + u32 i; + + for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) { + nvgpu_gr_ctx_mappings_unmap_ctx_buffer(ctx, i, mappings); + } +} + +static int nvgpu_gr_ctx_mappings_map_ctx_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *ctx, + struct nvgpu_gr_ctx_mappings *mappings) +{ + int err = 0; + u32 i; + + for (i = 0; i < NVGPU_GR_CTX_COUNT; i++) { + err = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, ctx, i, mappings); + if (err != 0) { + nvgpu_err(g, "gr_ctx buffer %u map failed %d", i, err); + nvgpu_gr_ctx_mappings_unmap_ctx_buffers(ctx, mappings); + return err; + } + } + + return err; +} + +#ifdef CONFIG_NVGPU_GFXP +static void nvgpu_gr_ctx_mappings_unmap_ctx_preemption_buffers( + struct nvgpu_gr_ctx *ctx, + struct nvgpu_gr_ctx_mappings *mappings) +{ + u32 i; + + for (i = NVGPU_GR_CTX_PREEMPT_CTXSW; + i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) { + nvgpu_gr_ctx_mappings_unmap_ctx_buffer(ctx, i, mappings); + } +} + +int nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *ctx, + struct nvgpu_gr_ctx_mappings *mappings) +{ + int err = 0; + u32 i; + + nvgpu_log(g, gpu_dbg_gr, " "); + + for (i = NVGPU_GR_CTX_PREEMPT_CTXSW; + i <= NVGPU_GR_CTX_GFXP_RTVCB_CTXSW; i++) { + if (mappings->ctx_buffer_va[i] == 0ULL) { + err = nvgpu_gr_ctx_mappings_map_ctx_buffer(g, ctx, i, mappings); + if (err != 0) { + nvgpu_err(g, "gr_ctx buffer %u map failed %d", i, err); + nvgpu_gr_ctx_mappings_unmap_ctx_preemption_buffers(ctx, mappings); + return err; + } + } + } + + nvgpu_log(g, gpu_dbg_gr, "done"); + + return err; +} +#endif + +static int nvgpu_gr_ctx_mappings_map_global_ctx_buffer( + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + u32 va_type, u32 buffer_type, u32 buffer_vpr_type, + bool vpr, struct nvgpu_gr_ctx_mappings *mappings) +{ + struct vm_gk20a *vm = mappings->vm; + u64 *g_bfr_va; + u32 *g_bfr_index; + u64 gpu_va = 0ULL; + + (void)vpr; + (void)buffer_vpr_type; + + g_bfr_va = &mappings->global_ctx_buffer_va[0]; + g_bfr_index = &mappings->global_ctx_buffer_index[0]; + +#ifdef CONFIG_NVGPU_VPR + if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer, + buffer_vpr_type)) { + gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, + buffer_vpr_type, + vm, true); + g_bfr_index[va_type] = buffer_vpr_type; + } else { +#endif + gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer, + buffer_type, + vm, true); + g_bfr_index[va_type] = buffer_type; +#ifdef CONFIG_NVGPU_VPR + } +#endif + if (gpu_va == 0ULL) { + goto clean_up; + } + + g_bfr_va[va_type] = gpu_va; + + return 0; + +clean_up: + return -ENOMEM; +} + +static void nvgpu_gr_ctx_mappings_unmap_global_ctx_buffers( + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct nvgpu_gr_ctx_mappings *mappings) +{ + u64 *g_bfr_va = &mappings->global_ctx_buffer_va[0]; + u32 *g_bfr_index = &mappings->global_ctx_buffer_index[0]; + struct vm_gk20a *vm = mappings->vm; + u32 i; + + for (i = 0U; i < NVGPU_GR_GLOBAL_CTX_VA_COUNT; i++) { + if (g_bfr_va[i] != 0ULL) { + nvgpu_gr_global_ctx_buffer_unmap(global_ctx_buffer, + g_bfr_index[i], vm, g_bfr_va[i]); + } + } + + (void) memset(g_bfr_va, 0, sizeof(mappings->global_ctx_buffer_va)); + (void) memset(g_bfr_index, 0, sizeof(mappings->global_ctx_buffer_index)); +} + +static int nvgpu_gr_ctx_mappings_map_global_ctx_buffers(struct gk20a *g, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct nvgpu_gr_ctx_mappings *mappings, bool vpr) +{ + int err; + + /* + * MIG supports only compute class. + * Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB + * if 2D/3D/I2M classes(graphics) are supported. + */ + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + /* Circular Buffer */ + err = nvgpu_gr_ctx_mappings_map_global_ctx_buffer( + global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_CIRCULAR_VA, + NVGPU_GR_GLOBAL_CTX_CIRCULAR, +#ifdef CONFIG_NVGPU_VPR + NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR, +#else + NVGPU_GR_GLOBAL_CTX_CIRCULAR, +#endif + vpr, mappings); + if (err != 0) { + nvgpu_err(g, "cannot map ctx circular buffer"); + goto fail; + } + + /* Attribute Buffer */ + err = nvgpu_gr_ctx_mappings_map_global_ctx_buffer( + global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VA, + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE, +#ifdef CONFIG_NVGPU_VPR + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR, +#else + NVGPU_GR_GLOBAL_CTX_ATTRIBUTE, +#endif + vpr, mappings); + if (err != 0) { + nvgpu_err(g, "cannot map ctx attribute buffer"); + goto fail; + } + + /* Page Pool */ + err = nvgpu_gr_ctx_mappings_map_global_ctx_buffer( + global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VA, + NVGPU_GR_GLOBAL_CTX_PAGEPOOL, +#ifdef CONFIG_NVGPU_VPR + NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR, +#else + NVGPU_GR_GLOBAL_CTX_PAGEPOOL, +#endif + vpr, mappings); + if (err != 0) { + nvgpu_err(g, "cannot map ctx pagepool buffer"); + goto fail; + } +#ifdef CONFIG_NVGPU_GRAPHICS + /* + * RTV circular buffer. Note that this is non-VPR buffer always. + */ + if (nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER)) { + err = nvgpu_gr_ctx_mappings_map_global_ctx_buffer( + global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER_VA, + NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER, + NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER, + false, mappings); + if (err != 0) { + nvgpu_err(g, + "cannot map ctx rtv circular buffer"); + goto fail; + } + } +#endif + } + + /* Priv register Access Map. Note that this is non-VPR buffer always. */ + err = nvgpu_gr_ctx_mappings_map_global_ctx_buffer( + global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_VA, + NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP, + NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP, + false, mappings); + if (err != 0) { + nvgpu_err(g, "cannot map ctx priv access buffer"); + goto fail; + } + +#ifdef CONFIG_NVGPU_FECS_TRACE + /* FECS trace buffer. Note that this is non-VPR buffer always. */ + if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) { + err = nvgpu_gr_ctx_mappings_map_global_ctx_buffer( + global_ctx_buffer, + NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER_VA, + NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER, + NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER, + false, mappings); + if (err != 0) { + nvgpu_err(g, "cannot map ctx fecs trace buffer"); + goto fail; + } + } +#endif + + return 0; + +fail: + nvgpu_gr_ctx_mappings_unmap_global_ctx_buffers( + global_ctx_buffer, mappings); + return err; +} + +int nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct nvgpu_gr_ctx_mappings *mappings, + bool vpr) +{ + int err; + + nvgpu_log(g, gpu_dbg_gr, " "); + + if (gr_ctx == NULL || global_ctx_buffer == NULL || + mappings == NULL) { + nvgpu_err(g, "mappings/gr_ctx/global_ctx_buffer struct null"); + return -EINVAL; + } + + err = nvgpu_gr_ctx_mappings_map_ctx_buffers(g, gr_ctx, mappings); + if (err != 0) { + nvgpu_err(g, "fail to map ctx buffers"); + return err; + } + + err = nvgpu_gr_ctx_mappings_map_global_ctx_buffers(g, + global_ctx_buffer, mappings, vpr); + if (err != 0) { + nvgpu_err(g, "fail to map global ctx buffer"); + nvgpu_gr_ctx_mappings_unmap_ctx_buffers(gr_ctx, mappings); + return err; + } + + nvgpu_log(g, gpu_dbg_gr, "done"); + + return err; +} + +void nvgpu_gr_ctx_unmap_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct nvgpu_gr_ctx_mappings *mappings) +{ + nvgpu_log(g, gpu_dbg_gr, " "); + + nvgpu_gr_ctx_mappings_unmap_global_ctx_buffers(global_ctx_buffer, + mappings); + + nvgpu_gr_ctx_mappings_unmap_ctx_buffers(gr_ctx, mappings); + + nvgpu_log(g, gpu_dbg_gr, "done"); +} + +u64 nvgpu_gr_ctx_mappings_get_global_ctx_va(struct nvgpu_gr_ctx_mappings *mappings, + u32 index) +{ + nvgpu_assert(index < NVGPU_GR_GLOBAL_CTX_VA_COUNT); + return mappings->global_ctx_buffer_va[index]; +} + +u64 nvgpu_gr_ctx_mappings_get_ctx_va(struct nvgpu_gr_ctx_mappings *mappings, + u32 index) +{ + nvgpu_assert(index < NVGPU_GR_CTX_COUNT); + return mappings->ctx_buffer_va[index]; +} diff --git a/drivers/gpu/nvgpu/common/gr/ctx_mappings_priv.h b/drivers/gpu/nvgpu/common/gr/ctx_mappings_priv.h new file mode 100644 index 000000000..34b3e6722 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/ctx_mappings_priv.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_CTX_MAPPINGS_PRIV_H +#define NVGPU_GR_CTX_MAPPINGS_PRIV_H + +#include + +struct nvgpu_tsg; +struct vm_gk20a; + +struct nvgpu_gr_ctx_mappings { + + /** TSG whose gr ctx mappings are tracked in this object */ + struct nvgpu_tsg *tsg; + + /** GPU virtual address space to which gr ctx buffers are mapped */ + struct vm_gk20a *vm; + + /** + * Array to store GPU virtual addresses of all TSG context + * buffers. + */ + u64 ctx_buffer_va[NVGPU_GR_CTX_COUNT]; + + /** + * Array to store GPU virtual addresses of all global context + * buffers. + */ + u64 global_ctx_buffer_va[NVGPU_GR_GLOBAL_CTX_VA_COUNT]; + + /** + * Array to store indexes of global context buffers + * corresponding to GPU virtual addresses above. + */ + u32 global_ctx_buffer_index[NVGPU_GR_GLOBAL_CTX_VA_COUNT]; +}; +#endif /* NVGPU_GR_CTX_MAPPINGS_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/gr/ctx_priv.h b/drivers/gpu/nvgpu/common/gr/ctx_priv.h index 3ecdecc29..a47228bfd 100644 --- a/drivers/gpu/nvgpu/common/gr/ctx_priv.h +++ b/drivers/gpu/nvgpu/common/gr/ctx_priv.h @@ -31,11 +31,6 @@ struct nvgpu_mem; * Pointer to this structure is maintained in #nvgpu_gr_ctx structure. */ struct patch_desc { - /** - * Memory to hold patch context buffer. - */ - struct nvgpu_mem mem; - /** * Count of entries written into patch context buffer. */ @@ -51,9 +46,8 @@ struct zcull_ctx_desc { #ifdef CONFIG_NVGPU_DEBUGGER struct pm_ctx_desc { - struct nvgpu_mem mem; - u64 gpu_va; u32 pm_mode; + bool mapped; }; #endif @@ -100,17 +94,21 @@ struct nvgpu_gr_ctx { bool ctx_id_valid; /** - * Memory to hold graphics context buffer. + * Array to store all GR context buffers. */ - struct nvgpu_mem mem; + struct nvgpu_mem mem[NVGPU_GR_CTX_COUNT]; -#ifdef CONFIG_NVGPU_GFXP - struct nvgpu_mem preempt_ctxsw_buffer; - struct nvgpu_mem spill_ctxsw_buffer; - struct nvgpu_mem betacb_ctxsw_buffer; - struct nvgpu_mem pagepool_ctxsw_buffer; - struct nvgpu_mem gfxp_rtvcb_ctxsw_buffer; -#endif + /** + * Cacheability flags for mapping the context buffers. + */ + u32 mapping_flags[NVGPU_GR_CTX_COUNT]; + + /** + * Pointer to structure that holds GPU mapping of context buffers. + * These mappings will exist for the lifetime of TSG when the + * subcontexts are not enabled. + */ + struct nvgpu_gr_ctx_mappings *mappings; /** * Patch context buffer descriptor struct. @@ -146,18 +144,6 @@ struct nvgpu_gr_ctx { bool boosted_ctx; #endif - /** - * Array to store GPU virtual addresses of all global context - * buffers. - */ - u64 global_ctx_buffer_va[NVGPU_GR_GLOBAL_CTX_VA_COUNT]; - - /** - * Array to store indexes of global context buffers - * corresponding to GPU virtual addresses above. - */ - u32 global_ctx_buffer_index[NVGPU_GR_GLOBAL_CTX_VA_COUNT]; - /** * TSG identifier corresponding to the graphics context. */ diff --git a/drivers/gpu/nvgpu/common/gr/fecs_trace.c b/drivers/gpu/nvgpu/common/gr/fecs_trace.c index 2ce3614e8..0d65e2953 100644 --- a/drivers/gpu/nvgpu/common/gr/fecs_trace.c +++ b/drivers/gpu/nvgpu/common/gr/fecs_trace.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -607,7 +608,8 @@ int nvgpu_gr_fecs_trace_reset(struct gk20a *g) */ int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g, struct nvgpu_mem *inst_block, struct nvgpu_gr_subctx *subctx, - struct nvgpu_gr_ctx *gr_ctx, pid_t pid, u32 vmid) + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings, + pid_t pid, u32 vmid) { u64 addr = 0ULL; struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; @@ -636,7 +638,7 @@ int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g, } if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) { - addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, + addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings, NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER_VA); nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr); aperture_mask = 0; @@ -650,7 +652,7 @@ int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g, return -ENOMEM; } - mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx); + mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX); nvgpu_log(g, gpu_dbg_ctxsw, "addr=%llx count=%d", addr, GK20A_FECS_TRACE_NUM_RECORDS); diff --git a/drivers/gpu/nvgpu/common/gr/global_ctx.c b/drivers/gpu/nvgpu/common/gr/global_ctx.c index cae66ed92..abb34946c 100644 --- a/drivers/gpu/nvgpu/common/gr/global_ctx.c +++ b/drivers/gpu/nvgpu/common/gr/global_ctx.c @@ -265,7 +265,6 @@ fail: return err; } - int nvgpu_gr_global_ctx_buffer_alloc(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *desc) { @@ -315,9 +314,32 @@ clean_up: return err; } +void nvgpu_gr_global_ctx_init_ctx_buffers_mapping_flags(struct gk20a *g, + struct nvgpu_gr_global_ctx_buffer_desc *desc) +{ + u32 i; + + nvgpu_log(g, gpu_dbg_gr, " "); + + /** + * Map all ctx buffers as cacheable except PRIV_ACCESS_MAP, + * RTV_CIRCULAR_BUFFER and FECS_TRACE buffers. + */ + for (i = 0; i < NVGPU_GR_GLOBAL_CTX_COUNT; i++) { + desc[i].mapping_flags = NVGPU_VM_MAP_CACHEABLE; + } + + desc[NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP].mapping_flags = 0U; + desc[NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER].mapping_flags = 0U; +#ifdef CONFIG_NVGPU_FECS_TRACE + desc[NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER].mapping_flags = 0U; +#endif + + nvgpu_log(g, gpu_dbg_gr, "done"); +} + u64 nvgpu_gr_global_ctx_buffer_map(struct nvgpu_gr_global_ctx_buffer_desc *desc, - u32 index, - struct vm_gk20a *vm, u32 flags, bool priv) + u32 index, struct vm_gk20a *vm, bool priv) { u64 gpu_va; @@ -326,7 +348,7 @@ u64 nvgpu_gr_global_ctx_buffer_map(struct nvgpu_gr_global_ctx_buffer_desc *desc, } gpu_va = nvgpu_gmmu_map(vm, &desc[index].mem, - flags, gk20a_mem_flag_none, priv, + desc[index].mapping_flags, gk20a_mem_flag_none, priv, desc[index].mem.aperture); return gpu_va; } diff --git a/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h b/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h index 06647c6f4..be9b275d3 100644 --- a/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h +++ b/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -40,6 +40,11 @@ struct nvgpu_gr_global_ctx_buffer_desc { */ size_t size; + /** + * Cacheability flags for mapping this context buffer. + */ + u32 mapping_flags; + /** * Function pointer to free global context buffer. */ diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c index a76655019..756dd7091 100644 --- a/drivers/gpu/nvgpu/common/gr/gr.c +++ b/drivers/gpu/nvgpu/common/gr/gr.c @@ -131,6 +131,9 @@ static int gr_alloc_global_ctx_buffers(struct gk20a *g, struct nvgpu_gr *gr) return err; } + nvgpu_gr_global_ctx_init_ctx_buffers_mapping_flags(g, + gr->global_ctx_buffer); + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done"); return 0; } diff --git a/drivers/gpu/nvgpu/common/gr/gr_setup.c b/drivers/gpu/nvgpu/common/gr/gr_setup.c index df790b4ff..8dd089626 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_setup.c +++ b/drivers/gpu/nvgpu/common/gr/gr_setup.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_NVGPU_GRAPHICS @@ -163,6 +164,7 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, struct nvgpu_tsg *tsg = NULL; int err = 0; struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); + struct nvgpu_gr_ctx_mappings *mappings = NULL; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "GR%u: allocate object context for channel %u", @@ -203,18 +205,27 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, gr_ctx = tsg->gr_ctx; - if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx))) { + mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, c->vm); + if (mappings == NULL) { + nvgpu_err(g, "fail to allocate/get ctx mappings struct"); + nvgpu_mutex_release(&tsg->ctx_init_lock); + goto out; + } + + if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx, + NVGPU_GR_CTX_CTX))) { tsg->vm = c->vm; nvgpu_vm_get(tsg->vm); err = nvgpu_gr_obj_ctx_alloc(g, gr->golden_image, gr->global_ctx_buffer, gr->gr_ctx_desc, gr->config, gr_ctx, c->subctx, - tsg->vm, &c->inst_block, class_num, flags, + mappings, &c->inst_block, class_num, flags, c->cde, c->vpr); if (err != 0) { nvgpu_err(g, "failed to allocate gr ctx buffer"); + nvgpu_gr_ctx_free_mappings(g, gr_ctx); nvgpu_mutex_release(&tsg->ctx_init_lock); nvgpu_vm_put(tsg->vm); tsg->vm = NULL; @@ -225,13 +236,13 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, } else { /* commit gr ctx buffer */ nvgpu_gr_obj_ctx_commit_inst(g, &c->inst_block, gr_ctx, - c->subctx, nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va); + c->subctx, mappings); } #ifdef CONFIG_NVGPU_FECS_TRACE if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) { err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block, - c->subctx, gr_ctx, tsg->tgid, 0); + c->subctx, gr_ctx, mappings, tsg->tgid, 0); if (err != 0) { nvgpu_warn(g, "fail to bind channel for ctxsw trace"); @@ -275,22 +286,27 @@ out: } void nvgpu_gr_setup_free_gr_ctx(struct gk20a *g, - struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) + struct nvgpu_gr_ctx *gr_ctx) { + struct nvgpu_mem *mem; + nvgpu_log_fn(g, " "); - if ((gr_ctx != NULL) && - nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx))) { + if (gr_ctx != NULL) { + mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX); + if (!nvgpu_mem_is_valid(mem)) { + return; + } + #ifdef CONFIG_DEBUG_FS if ((g->ops.gr.ctxsw_prog.dump_ctxsw_stats != NULL) && nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close( g->gr->gr_ctx_desc)) { - g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g, - nvgpu_gr_ctx_get_ctx_mem(gr_ctx)); + g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g, mem); } #endif - nvgpu_gr_ctx_free(g, gr_ctx, g->gr->global_ctx_buffer, vm); + nvgpu_gr_ctx_free(g, gr_ctx, g->gr->global_ctx_buffer); } } @@ -334,16 +350,14 @@ static bool nvgpu_gr_setup_validate_preemption_mode(u32 *graphics_preempt_mode, return true; } - - int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch, u32 graphics_preempt_mode, u32 compute_preempt_mode, u32 gr_instance_id) { + struct nvgpu_gr_ctx_mappings *mappings; struct nvgpu_gr_ctx *gr_ctx; struct gk20a *g = ch->g; struct nvgpu_tsg *tsg; - struct vm_gk20a *vm; struct nvgpu_gr *gr; u32 class_num; int err = 0; @@ -365,7 +379,6 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch, return -EINVAL; } - vm = tsg->vm; gr_ctx = tsg->gr_ctx; if (nvgpu_gr_setup_validate_preemption_mode(&graphics_preempt_mode, @@ -379,13 +392,35 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch, graphics_preempt_mode, compute_preempt_mode); err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, gr->config, - gr->gr_ctx_desc, gr_ctx, vm, class_num, + gr->gr_ctx_desc, gr_ctx, class_num, graphics_preempt_mode, compute_preempt_mode); if (err != 0) { nvgpu_err(g, "set_ctxsw_preemption_mode failed"); return err; } + mappings = nvgpu_gr_ctx_get_mappings(tsg); + if (mappings == NULL) { + nvgpu_err(g, "failed to get gr_ctx mappings"); + return -EINVAL; + } + +#ifdef CONFIG_NVGPU_GFXP + err = nvgpu_gr_ctx_alloc_ctx_preemption_buffers(g, + gr->gr_ctx_desc, gr_ctx); + if (err != 0) { + nvgpu_err(g, "fail to allocate ctx preemption buffers"); + return err; + } + + err = nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(g, + gr_ctx, mappings); + if (err != 0) { + nvgpu_err(g, "fail to map ctx preemption buffers"); + return err; + } + #endif + g->ops.tsg.disable(tsg); err = nvgpu_preempt_channel(g, ch); @@ -395,7 +430,7 @@ int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch, } nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, gr->config, gr_ctx, - ch->subctx); + ch->subctx, mappings); if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true); diff --git a/drivers/gpu/nvgpu/common/gr/obj_ctx.c b/drivers/gpu/nvgpu/common/gr/obj_ctx.c index b474c7799..273674952 100644 --- a/drivers/gpu/nvgpu/common/gr/obj_ctx.c +++ b/drivers/gpu/nvgpu/common/gr/obj_ctx.c @@ -29,6 +29,7 @@ #include #endif #include +#include #include #include #include @@ -48,19 +49,22 @@ void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g, } void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block, - struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, u64 gpu_va) + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, + struct nvgpu_gr_ctx_mappings *mappings) { struct nvgpu_mem *ctxheader; + u64 gpu_va; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) { - nvgpu_gr_subctx_load_ctx_header(g, subctx, gr_ctx, gpu_va); + nvgpu_gr_subctx_load_ctx_header(g, subctx, gr_ctx, mappings); ctxheader = nvgpu_gr_subctx_get_ctx_header(subctx); nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block, ctxheader->gpu_va); } else { + gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_CTX); nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block, gpu_va); } @@ -70,7 +74,7 @@ void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block, #if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP) static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc, - struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx, u32 class_num, u32 flags) { int err; @@ -122,7 +126,7 @@ static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g, } err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, config, - gr_ctx_desc, gr_ctx, vm, class_num, graphics_preempt_mode, + gr_ctx_desc, gr_ctx, class_num, graphics_preempt_mode, compute_preempt_mode); if (err != 0) { nvgpu_err(g, "set_ctxsw_preemption_mode failed"); @@ -138,14 +142,13 @@ static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g, #ifdef CONFIG_NVGPU_GRAPHICS static int nvgpu_gr_obj_ctx_set_graphics_preemption_mode(struct gk20a *g, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc, - struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, + struct nvgpu_gr_ctx *gr_ctx, u32 graphics_preempt_mode) { int err = 0; (void)config; (void)gr_ctx_desc; - (void)vm; /* set preemption modes */ switch (graphics_preempt_mode) { @@ -166,6 +169,13 @@ static int nvgpu_gr_obj_ctx_set_graphics_preemption_mode(struct gk20a *g, nvgpu_log_info(g, "gfxp context attrib_cb_size=%d", attrib_cb_size); + if ((nvgpu_gr_ctx_get_size(gr_ctx_desc, + NVGPU_GR_CTX_PREEMPT_CTXSW) == 0U) || + (spill_size == 0U) || (attrib_cb_size == 0U) || + (pagepool_size == 0U)) { + return -EINVAL; + } + nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_SPILL_CTXSW, spill_size); nvgpu_gr_ctx_set_size(gr_ctx_desc, @@ -179,13 +189,6 @@ static int nvgpu_gr_obj_ctx_set_graphics_preemption_mode(struct gk20a *g, NVGPU_GR_CTX_GFXP_RTVCB_CTXSW, rtv_cb_size); } - err = nvgpu_gr_ctx_alloc_ctxsw_buffers(g, gr_ctx, - gr_ctx_desc, vm); - if (err != 0) { - nvgpu_err(g, "cannot allocate ctxsw buffers"); - return err; - } - nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx, graphics_preempt_mode); break; @@ -226,7 +229,7 @@ static int nvgpu_gr_obj_ctx_set_compute_preemption_mode(struct gk20a *g, int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc, - struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class_num, + struct nvgpu_gr_ctx *gr_ctx, u32 class_num, u32 graphics_preempt_mode, u32 compute_preempt_mode) { int err = 0; @@ -243,7 +246,7 @@ int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g, #ifdef CONFIG_NVGPU_GRAPHICS err = nvgpu_gr_obj_ctx_set_graphics_preemption_mode(g, config, - gr_ctx_desc, gr_ctx, vm, graphics_preempt_mode); + gr_ctx_desc, gr_ctx, graphics_preempt_mode); if (err != 0) { goto fail; @@ -259,7 +262,8 @@ fail: void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_config *config, - struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx) + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, + struct nvgpu_gr_ctx_mappings *mappings) { #ifdef CONFIG_NVGPU_GFXP u64 addr; @@ -269,6 +273,7 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, (void)config; (void)subctx; + (void)mappings; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); @@ -280,35 +285,35 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, } if (!nvgpu_mem_is_valid( - nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx))) { + nvgpu_gr_ctx_get_ctx_mem(gr_ctx, + NVGPU_GR_CTX_PREEMPT_CTXSW))) { goto done; } if (subctx != NULL) { - nvgpu_gr_subctx_set_preemption_buffer_va(g, subctx, - gr_ctx); + nvgpu_gr_subctx_set_preemption_buffer_va(g, subctx, mappings); } else { - nvgpu_gr_ctx_set_preemption_buffer_va(g, gr_ctx); + nvgpu_gr_ctx_set_preemption_buffer_va(g, gr_ctx, mappings); } nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true); - addr = nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->gpu_va; - g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx, + addr = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_BETACB_CTXSW); + g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx, mappings, nvgpu_gr_config_get_tpc_count(config), nvgpu_gr_config_get_max_tpc_count(config), addr, true); - mem = nvgpu_gr_ctx_get_pagepool_ctxsw_buffer(gr_ctx); - addr = mem->gpu_va; + mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_PAGEPOOL_CTXSW); + addr = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PAGEPOOL_CTXSW); nvgpu_assert(mem->size <= U32_MAX); size = (u32)mem->size; g->ops.gr.init.commit_global_pagepool(g, gr_ctx, addr, size, true, false); - mem = nvgpu_gr_ctx_get_spill_ctxsw_buffer(gr_ctx); - addr = mem->gpu_va; + mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_SPILL_CTXSW); + addr = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_SPILL_CTXSW); nvgpu_assert(mem->size <= U32_MAX); size = (u32)mem->size; @@ -321,7 +326,7 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, } if (g->ops.gr.init.commit_gfxp_rtv_cb != NULL) { - g->ops.gr.init.commit_gfxp_rtv_cb(g, gr_ctx, true); + g->ops.gr.init.commit_gfxp_rtv_cb(g, gr_ctx, mappings, true); } nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true); @@ -333,7 +338,10 @@ done: void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, - struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, bool patch) + struct nvgpu_gr_config *config, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_mappings *mappings, + bool patch) { u64 addr; u32 size; @@ -351,7 +359,7 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, */ if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { /* global pagepool buffer */ - addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, + addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings, NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VA); size = nvgpu_safe_cast_u64_to_u32(nvgpu_gr_global_ctx_get_size( global_ctx_buffer, @@ -361,7 +369,7 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, patch, true); /* global bundle cb */ - addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, + addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings, NVGPU_GR_GLOBAL_CTX_CIRCULAR_VA); size = nvgpu_safe_cast_u64_to_u32( g->ops.gr.init.get_bundle_cb_default_size(g)); @@ -370,10 +378,10 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, patch); /* global attrib cb */ - addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, + addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings, NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VA); - g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx, + g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx, mappings, nvgpu_gr_config_get_tpc_count(config), nvgpu_gr_config_get_max_tpc_count(config), addr, patch); @@ -383,7 +391,7 @@ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, #ifdef CONFIG_NVGPU_GRAPHICS if (g->ops.gr.init.commit_rtv_cb != NULL) { /* RTV circular buffer */ - addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, + addr = nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings, NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER_VA); g->ops.gr.init.commit_rtv_cb(g, addr, gr_ctx, patch); @@ -546,7 +554,8 @@ clean_up: static int nvgpu_gr_obj_ctx_commit_hw_state(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, - struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx) + struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_mappings *mappings) { int err = 0; struct netlist_av_list *sw_method_init = @@ -562,7 +571,7 @@ static int nvgpu_gr_obj_ctx_commit_hw_state(struct gk20a *g, g->ops.gr.init.fe_go_idle_timeout(g, false); nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer, - config, gr_ctx, false); + config, gr_ctx, mappings, false); if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { /* override a few ctx state registers */ @@ -635,7 +644,7 @@ static int nvgpu_gr_obj_ctx_save_golden_ctx(struct gk20a *g, nvgpu_log(g, gpu_dbg_gr, " "); - gr_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx); + gr_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX); size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image); #ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION @@ -707,6 +716,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_mappings *mappings, struct nvgpu_mem *inst_block) { int err = 0; @@ -731,7 +741,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, } err = nvgpu_gr_obj_ctx_commit_hw_state(g, global_ctx_buffer, - config, gr_ctx); + config, gr_ctx, mappings); if (err != 0) { goto clean_up; } @@ -778,27 +788,71 @@ clean_up: return err; } -static int nvgpu_gr_obj_ctx_gr_ctx_alloc(struct gk20a *g, +static void nvgpu_gr_obj_ctx_gr_ctx_set_size(struct gk20a *g, struct nvgpu_gr_obj_ctx_golden_image *golden_image, - struct nvgpu_gr_ctx_desc *gr_ctx_desc, struct nvgpu_gr_ctx *gr_ctx, - struct vm_gk20a *vm) + struct nvgpu_gr_ctx_desc *gr_ctx_desc) { u64 size; - int err = 0; - - nvgpu_log_fn(g, " "); size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image); nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_CTX, nvgpu_safe_cast_u64_to_u32(size)); nvgpu_log(g, gpu_dbg_gr, "gr_ctx size = %llu", size); - err = nvgpu_gr_ctx_alloc(g, gr_ctx, gr_ctx_desc, vm); +} + +static void nvgpu_gr_obj_ctx_patch_ctx_set_size(struct gk20a *g, + struct nvgpu_gr_config *config, + struct nvgpu_gr_ctx_desc *gr_ctx_desc) +{ + u32 size; + + size = nvgpu_safe_mult_u32( + g->ops.gr.init.get_patch_slots(g, config), + PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY); + nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_PATCH_CTX, size); + + nvgpu_log(g, gpu_dbg_gr, "patch_ctx size = %u", size); +} + +static int nvgpu_gr_obj_ctx_alloc_buffers(struct gk20a *g, + struct nvgpu_gr_obj_ctx_golden_image *golden_image, + struct nvgpu_gr_ctx_desc *gr_ctx_desc, + struct nvgpu_gr_config *config, + struct nvgpu_gr_ctx *gr_ctx, + u32 class_num, u32 flags) +{ + int err; + + (void)class_num; + (void)flags; + + nvgpu_log(g, gpu_dbg_gr, " "); + + nvgpu_gr_obj_ctx_gr_ctx_set_size(g, golden_image, gr_ctx_desc); + + nvgpu_gr_obj_ctx_patch_ctx_set_size(g, config, gr_ctx_desc); + + nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0); + +#if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP) + err = nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(g, config, + gr_ctx_desc, gr_ctx, class_num, flags); if (err != 0) { + nvgpu_err(g, "fail to init preemption mode"); + return err; + } +#endif + + err = nvgpu_gr_ctx_alloc_ctx_buffers(g, gr_ctx_desc, gr_ctx); + if (err != 0) { + nvgpu_err(g, "fail to allocate ctx buffers"); return err; } - return 0; + nvgpu_log(g, gpu_dbg_gr, "done"); + + return err; } int nvgpu_gr_obj_ctx_alloc(struct gk20a *g, @@ -808,69 +862,40 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, - struct vm_gk20a *vm, + struct nvgpu_gr_ctx_mappings *mappings, struct nvgpu_mem *inst_block, u32 class_num, u32 flags, bool cde, bool vpr) { int err = 0; - (void)class_num; - (void)flags; - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " "); - err = nvgpu_gr_obj_ctx_gr_ctx_alloc(g, golden_image, gr_ctx_desc, - gr_ctx, vm); + err = nvgpu_gr_obj_ctx_alloc_buffers(g, golden_image, gr_ctx_desc, + config, gr_ctx, class_num, flags); if (err != 0) { - nvgpu_err(g, "fail to allocate TSG gr ctx buffer"); + nvgpu_err(g, "failed to alloc ctx buffers"); goto out; } - /* allocate patch buffer */ - if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx))) { - nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0); + nvgpu_gr_ctx_init_ctx_buffers_mapping_flags(g, gr_ctx); - nvgpu_gr_ctx_set_size(gr_ctx_desc, - NVGPU_GR_CTX_PATCH_CTX, - nvgpu_safe_mult_u32( - g->ops.gr.init.get_patch_slots(g, config), - PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)); - - err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, gr_ctx_desc, vm); - if (err != 0) { - nvgpu_err(g, "fail to allocate patch buffer"); - goto out; - } - } - -#if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP) - err = nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(g, config, - gr_ctx_desc, gr_ctx, vm, class_num, flags); + err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, + global_ctx_buffer, mappings, vpr); if (err != 0) { - nvgpu_err(g, "fail to init preemption mode"); - goto out; - } -#endif - - /* map global buffer to channel gpu_va and commit */ - err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, - global_ctx_buffer, vm, vpr); - if (err != 0) { - nvgpu_err(g, "fail to map global ctx buffer"); + nvgpu_err(g, "failed to map ctx buffers"); goto out; } nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer, - config, gr_ctx, true); + config, gr_ctx, mappings, true); /* commit gr ctx buffer */ - nvgpu_gr_obj_ctx_commit_inst(g, inst_block, gr_ctx, subctx, - nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va); + nvgpu_gr_obj_ctx_commit_inst(g, inst_block, gr_ctx, subctx, mappings); /* init golden image */ err = nvgpu_gr_obj_ctx_alloc_golden_ctx_image(g, golden_image, - global_ctx_buffer, config, gr_ctx, inst_block); + global_ctx_buffer, config, gr_ctx, mappings, inst_block); if (err != 0) { nvgpu_err(g, "fail to init golden ctx image"); goto out; @@ -890,11 +915,11 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g, #endif /* load golden image */ - nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx, + nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx, mappings, golden_image->local_golden_image, cde); nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, config, gr_ctx, - subctx); + subctx, mappings); #ifndef CONFIG_NVGPU_NON_FUSA if (g->ops.gpu_class.is_valid_compute(class_num) && diff --git a/drivers/gpu/nvgpu/common/gr/subctx.c b/drivers/gpu/nvgpu/common/gr/subctx.c index f08bb6036..6cc6d5773 100644 --- a/drivers/gpu/nvgpu/common/gr/subctx.c +++ b/drivers/gpu/nvgpu/common/gr/subctx.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -81,23 +82,27 @@ void nvgpu_gr_subctx_free(struct gk20a *g, void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g, struct nvgpu_gr_subctx *subctx, - struct nvgpu_gr_ctx *gr_ctx, u64 gpu_va) + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_mappings *mappings) { struct nvgpu_mem *ctxheader = &subctx->ctx_header; + u64 gpu_va; + + gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_CTX); #ifdef CONFIG_NVGPU_SET_FALCON_ACCESS_MAP /* set priv access map */ g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, ctxheader, - nvgpu_gr_ctx_get_global_ctx_va(gr_ctx, + nvgpu_gr_ctx_mappings_get_global_ctx_va(mappings, NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_VA)); #endif g->ops.gr.ctxsw_prog.set_patch_addr(g, ctxheader, - nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx)->gpu_va); + nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PATCH_CTX)); #ifdef CONFIG_NVGPU_DEBUGGER g->ops.gr.ctxsw_prog.set_pm_ptr(g, ctxheader, - nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx)->gpu_va); + nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PM_CTX)); #endif #ifdef CONFIG_NVGPU_GRAPHICS @@ -129,24 +134,26 @@ void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx #ifdef CONFIG_NVGPU_GFXP void nvgpu_gr_subctx_set_preemption_buffer_va(struct gk20a *g, - struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx) + struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx_mappings *mappings) { + u64 preempt_ctxsw_gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, + NVGPU_GR_CTX_PREEMPT_CTXSW); + g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, &subctx->ctx_header, - nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va); + preempt_ctxsw_gpu_va); if (g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0 != NULL) { g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g, - &subctx->ctx_header, - nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va); + &subctx->ctx_header, preempt_ctxsw_gpu_va); } } #endif /* CONFIG_NVGPU_GFXP */ #ifdef CONFIG_NVGPU_DEBUGGER void nvgpu_gr_subctx_set_hwpm_ptr(struct gk20a *g, - struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx) + struct nvgpu_gr_subctx *subctx, u64 pm_ctx_gpu_va) { g->ops.gr.ctxsw_prog.set_pm_ptr(g, &subctx->ctx_header, - nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx)->gpu_va); + pm_ctx_gpu_va); } #endif diff --git a/drivers/gpu/nvgpu/common/profiler/profiler.c b/drivers/gpu/nvgpu/common/profiler/profiler.c index 8b277bae1..2396676eb 100644 --- a/drivers/gpu/nvgpu/common/profiler/profiler.c +++ b/drivers/gpu/nvgpu/common/profiler/profiler.c @@ -506,8 +506,7 @@ static int nvgpu_profiler_quiesce_hwpm_streamout_non_resident(struct gk20a *g, return -EINVAL; } - pm_ctx_mem = nvgpu_gr_ctx_get_pm_ctx_mem(tsg->gr_ctx); - + pm_ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx, NVGPU_GR_CTX_PM_CTX); if (pm_ctx_mem == NULL) { nvgpu_err(g, "No PM context"); return -EINVAL; diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c index 9a5e2b555..a20bc5547 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.c @@ -1,7 +1,7 @@ /* * Virtualized GPU Graphics * - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -41,7 +41,7 @@ #include "common/vgpu/ivc/comm_vgpu.h" void vgpu_gr_free_gr_ctx(struct gk20a *g, - struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) + struct nvgpu_gr_ctx *gr_ctx) { struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; @@ -56,4 +56,4 @@ void vgpu_gr_free_gr_ctx(struct gk20a *g, WARN_ON(err || msg.ret); (void) memset(gr_ctx, 0, sizeof(*gr_ctx)); -} \ No newline at end of file +} diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.h index 9f102d998..a30efe1c7 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.h +++ b/drivers/gpu/nvgpu/common/vgpu/gr/ctx_vgpu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -28,6 +28,6 @@ struct nvgpu_gr_ctx; struct vm_gk20a; void vgpu_gr_free_gr_ctx(struct gk20a *g, - struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); + struct nvgpu_gr_ctx *gr_ctx); #endif diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c index 81c3c6ff8..c760c5420 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c @@ -85,7 +85,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, struct nvgpu_channel *ch; struct nvgpu_gr_ctx *gr_ctx; bool skip_update = false; - int err; + u64 pm_ctx_gpu_va = 0ULL; int ret; struct nvgpu_gr *gr = nvgpu_gr_get_instance_ptr(g, gr_instance_id); @@ -94,15 +94,11 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, gr_ctx = tsg->gr_ctx; if (mode != NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW) { - nvgpu_gr_ctx_set_size(gr->gr_ctx_desc, - NVGPU_GR_CTX_PM_CTX, - nvgpu_gr_hwpm_map_get_size(gr->hwpm_map)); - - ret = nvgpu_gr_ctx_alloc_pm_ctx(g, gr_ctx, - gr->gr_ctx_desc, tsg->vm); + ret = nvgpu_gr_ctx_alloc_map_pm_ctx(g, tsg, + gr->gr_ctx_desc, gr->hwpm_map); if (ret != 0) { nvgpu_err(g, - "failed to allocate pm ctxt buffer"); + "failed to allocate and map pm ctxt buffer"); return ret; } @@ -112,7 +108,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, } } - ret = nvgpu_gr_ctx_prepare_hwpm_mode(g, gr_ctx, mode, &skip_update); + ret = nvgpu_gr_ctx_prepare_hwpm_mode(g, gr_ctx, mode, + &pm_ctx_gpu_va, &skip_update); if (ret != 0) { return ret; } @@ -134,11 +131,12 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, nvgpu_rwsem_down_read(&tsg->ch_list_lock); nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) { - nvgpu_gr_subctx_set_hwpm_ptr(g, ch->subctx, gr_ctx); + nvgpu_gr_subctx_set_hwpm_ptr(g, ch->subctx, + pm_ctx_gpu_va); } nvgpu_rwsem_up_read(&tsg->ch_list_lock); } else { - nvgpu_gr_ctx_set_hwpm_ptr(g, gr_ctx); + nvgpu_gr_ctx_set_hwpm_ptr(g, gr_ctx, pm_ctx_gpu_va); } out: @@ -1523,7 +1521,8 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg, if (!gr_ctx_ready) { gr_ctx_ready = true; } - current_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx); + current_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, + NVGPU_GR_CTX_CTX); } else { err = gr_gk20a_get_pm_ctx_buffer_offsets(g, ctx_ops[i].offset, @@ -1539,7 +1538,9 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg, } if (!pm_ctx_ready) { /* Make sure ctx buffer was initialized */ - if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx))) { + if (!nvgpu_mem_is_valid( + nvgpu_gr_ctx_get_ctx_mem(gr_ctx, + NVGPU_GR_CTX_PM_CTX))) { nvgpu_err(g, "Invalid ctx buffer"); err = -EINVAL; @@ -1547,14 +1548,16 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg, } pm_ctx_ready = true; } - current_mem = nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx); + current_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, + NVGPU_GR_CTX_PM_CTX); } for (j = 0; j < num_offsets; j++) { /* sanity check gr ctxt offsets, * don't write outside, worst case */ - if ((current_mem == nvgpu_gr_ctx_get_ctx_mem(gr_ctx)) && + if ((current_mem == nvgpu_gr_ctx_get_ctx_mem( + gr_ctx, NVGPU_GR_CTX_CTX)) && (offsets[j] >= nvgpu_gr_obj_ctx_get_golden_image_size( gr->golden_image))) { @@ -1581,7 +1584,9 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg, offsets[j] + 4U, v); } - if (current_mem == nvgpu_gr_ctx_get_ctx_mem(gr_ctx) && + if (current_mem == + nvgpu_gr_ctx_get_ctx_mem( + gr_ctx, NVGPU_GR_CTX_CTX) && g->ops.gr.ctx_patch_smpc != NULL) { /* check to see if we need to add a special fix for some of the SMPC perf regs */ @@ -1617,7 +1622,7 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg, nvgpu_kfree(g, offsets); } - if (nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx)->cpu_va != NULL) { + if (nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_PATCH_CTX)->cpu_va != NULL) { nvgpu_gr_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready); } diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c index fcbc9f61e..3c71ad8c9 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gm20b.c @@ -394,7 +394,7 @@ int gr_gm20b_update_pc_sampling(struct nvgpu_channel *c, } gr_ctx = tsg->gr_ctx; - mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx); + mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX); if (!nvgpu_mem_is_valid(mem) || c->vpr) { return -EINVAL; } diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c index d0064ae64..2c1f40a5f 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gp10b.c @@ -767,7 +767,7 @@ int gr_gp10b_set_boosted_ctx(struct nvgpu_channel *ch, gr_ctx = tsg->gr_ctx; nvgpu_gr_ctx_set_boosted_ctx(gr_ctx, boost); - mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx); + mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_CTX); err = nvgpu_channel_disable_tsg(g, ch); if (err != 0) { diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h index fc69b5a6b..5fd2d4586 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -34,6 +34,7 @@ struct gk20a; struct nvgpu_gr_ctx; +struct nvgpu_gr_ctx_mappings; struct nvgpu_gr_config; struct netlist_av_list; struct nvgpu_gr_config; @@ -71,8 +72,8 @@ u32 gm20b_gr_init_get_global_ctx_cb_buffer_size(struct gk20a *g); u32 gm20b_gr_init_get_global_ctx_pagepool_buffer_size(struct gk20a *g); void gm20b_gr_init_commit_global_attrib_cb(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, u32 tpc_count, u32 max_tpc, u64 addr, - bool patch); + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings, + u32 tpc_count, u32 max_tpc, u64 addr, bool patch); u32 gm20b_gr_init_get_patch_slots(struct gk20a *g, struct nvgpu_gr_config *config); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b_fusa.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b_fusa.c index 677a01a55..3f8b3a74e 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b_fusa.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b_fusa.c @@ -402,13 +402,14 @@ u32 gm20b_gr_init_get_global_ctx_pagepool_buffer_size(struct gk20a *g) } void gm20b_gr_init_commit_global_attrib_cb(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, u32 tpc_count, u32 max_tpc, u64 addr, - bool patch) + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings, + u32 tpc_count, u32 max_tpc, u64 addr, bool patch) { u32 cb_addr; (void)tpc_count; (void)max_tpc; + (void)mappings; addr = addr >> gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v(); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h index 6c4357078..fa0941654 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h @@ -34,6 +34,7 @@ struct gk20a; struct nvgpu_gr_config; struct nvgpu_gr_ctx; +struct nvgpu_gr_ctx_mappings; struct netlist_av_list; struct nvgpu_gr_obj_ctx_gfx_regs; @@ -69,8 +70,8 @@ u32 gv11b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count, u32 max_tpc); void gv11b_gr_init_commit_global_attrib_cb(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, u32 tpc_count, u32 max_tpc, u64 addr, - bool patch); + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings, + u32 tpc_count, u32 max_tpc, u64 addr, bool patch); void gv11b_gr_init_fe_go_idle_timeout(struct gk20a *g, bool enable); #ifdef CONFIG_NVGPU_SM_DIVERSITY diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c index 40a091bfc..766524545 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b_fusa.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -711,21 +712,21 @@ u32 gv11b_gr_init_get_global_attr_cb_size(struct gk20a *g, u32 tpc_count, } void gv11b_gr_init_commit_global_attrib_cb(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, u32 tpc_count, u32 max_tpc, u64 addr, - bool patch) + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings, + u32 tpc_count, u32 max_tpc, u64 addr, bool patch) { u32 attrBufferSize; u32 cb_addr; - gm20b_gr_init_commit_global_attrib_cb(g, gr_ctx, tpc_count, max_tpc, - addr, patch); + gm20b_gr_init_commit_global_attrib_cb(g, gr_ctx, mappings, tpc_count, + max_tpc, addr, patch); addr = addr >> gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v(); #ifdef CONFIG_NVGPU_GFXP - if (nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va != 0ULL) { + if (nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_PREEMPT_CTXSW) != 0ULL) { attrBufferSize = nvgpu_safe_cast_u64_to_u32( - nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->size); + nvgpu_gr_ctx_get_ctx_mem(gr_ctx, NVGPU_GR_CTX_BETACB_CTXSW)->size); } else { #endif attrBufferSize = g->ops.gr.init.get_global_attr_cb_size(g, diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c index fbd4c4b66..053cd947c 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "gr_init_gm20b.h" #include "gr_init_tu104.h" @@ -194,12 +195,13 @@ void tu104_gr_init_commit_rtv_cb(struct gk20a *g, u64 addr, #ifdef CONFIG_NVGPU_GFXP void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, bool patch) + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings, + bool patch) { u64 addr; + u64 gpu_va; u32 rtv_cb_size; u32 gfxp_addr_size; - struct nvgpu_mem *buf_mem; nvgpu_log_fn(g, " "); @@ -211,9 +213,8 @@ void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g, gfxp_addr_size = gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f(); /* GFXP RTV circular buffer */ - buf_mem = nvgpu_gr_ctx_get_gfxp_rtvcb_ctxsw_buffer(gr_ctx); - addr = buf_mem->gpu_va >> - gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f(); + gpu_va = nvgpu_gr_ctx_mappings_get_ctx_va(mappings, NVGPU_GR_CTX_GFXP_RTVCB_CTXSW); + addr = gpu_va >> gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f(); nvgpu_assert(u64_hi32(addr) == 0U); tu104_gr_init_patch_rtv_cb(g, gr_ctx, (u32)addr, diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.h index 9a8ce5802..c6ec0eb1e 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_tu104.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -28,6 +28,7 @@ struct gk20a; struct nvgpu_gr_ctx; struct netlist_av64_list; +struct nvgpu_gr_ctx_mappings; u32 tu104_gr_init_get_bundle_cb_default_size(struct gk20a *g); u32 tu104_gr_init_get_min_gpm_fifo_depth(struct gk20a *g); @@ -44,7 +45,8 @@ void tu104_gr_init_commit_rtv_cb(struct gk20a *g, u64 addr, struct nvgpu_gr_ctx *gr_ctx, bool patch); void tu104_gr_init_commit_gfxp_rtv_cb(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, bool patch); + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings, + bool patch); u32 tu104_gr_init_get_attrib_cb_gfxp_default_size(struct gk20a *g); u32 tu104_gr_init_get_attrib_cb_gfxp_size(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h index 687ab9346..55d540a98 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h @@ -34,6 +34,7 @@ struct vm_gk20a; struct nvgpu_mem; struct nvgpu_channel; struct nvgpu_gr_ctx; +struct nvgpu_gr_ctx_mappings; struct nvgpu_gr_config; struct nvgpu_gr_isr_data; struct nvgpu_gr_intr_info; @@ -188,7 +189,6 @@ struct gops_gr_setup { * @brief Free GR engine context image. * * @param g [in] Pointer to GPU driver struct. - * @param vm [in] Pointer to virtual memory. * @param gr_ctx [in] Pointer to GR engine context image. * * This function will free memory allocated for patch @@ -199,7 +199,6 @@ struct gops_gr_setup { * @see nvgpu_gr_setup_free_gr_ctx */ void (*free_gr_ctx)(struct gk20a *g, - struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); /** @@ -849,6 +848,7 @@ struct gops_gr_init { bool patch, bool global_ctx); void (*commit_global_attrib_cb)(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx, + struct nvgpu_gr_ctx_mappings *mappings, u32 tpc_count, u32 max_tpc, u64 addr, bool patch); void (*commit_global_cb_manager)(struct gk20a *g, @@ -899,6 +899,7 @@ struct gops_gr_init { bool patch); void (*commit_gfxp_rtv_cb)(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_mappings *mappings, bool patch); u32 (*get_attrib_cb_gfxp_default_size)(struct gk20a *g); u32 (*get_attrib_cb_gfxp_size)(struct gk20a *g); @@ -1109,6 +1110,7 @@ struct gops_gr_fecs_trace { struct nvgpu_mem *inst_block, struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_mappings *mappings, pid_t pid, u32 vmid); int (*unbind_channel)(struct gk20a *g, struct nvgpu_mem *inst_block); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h index 1b9d5e8d7..7a703db57 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h @@ -26,6 +26,7 @@ #include #include #include +#include #define NVGPU_INVALID_SM_CONFIG_ID (U32_MAX) @@ -39,7 +40,9 @@ */ struct gk20a; struct vm_gk20a; +struct nvgpu_tsg; struct nvgpu_gr_ctx; +struct nvgpu_gr_ctx_mappings; struct nvgpu_gr_global_ctx_buffer_desc; struct nvgpu_gr_global_ctx_local_golden_image; struct patch_desc; @@ -156,25 +159,19 @@ void nvgpu_gr_ctx_set_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc, u32 index, u32 size); /** - * @brief Allocate graphics context buffer. + * @brief Get size of GR context buffer with given index. * - * @param g [in] Pointer to GPU driver struct. - * @param gr_ctx [in] Pointer to graphics context struct. - * @param gr_ctx_desc [in] Pointer to context descriptor struct. - * @param vm [in] Pointer to virtual memory. + * @param desc [in] Pointer to context descriptor struct. + * @param index [in] Index of GR context buffer. * - * This function allocates memory for graphics context buffer and also - * maps it to given virtual memory. + * @return size of the buffer. * - * @return 0 in case of success, < 0 in case of failure. - * @retval -ENOMEM if context memory allocation fails. - * @retval -EINVAL if context buffer size is not set in - * #nvgpu_gr_ctx_desc struct. + * This function returns the size of GR context buffer with given buffer + * index. \a index must be less than NVGPU_GR_CTX_COUNT otherwise + * an assert is raised. */ -int nvgpu_gr_ctx_alloc(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_ctx_desc *gr_ctx_desc, - struct vm_gk20a *vm); +u32 nvgpu_gr_ctx_get_size(struct nvgpu_gr_ctx_desc *gr_ctx_desc, + u32 index); /** * @brief Free graphics context buffer. @@ -182,93 +179,14 @@ int nvgpu_gr_ctx_alloc(struct gk20a *g, * @param g [in] Pointer to GPU driver struct. * @param gr_ctx [in] Pointer to graphics context struct. * @param global_ctx_buffer [in]Pointer to global context descriptor struct. - * @param vm [in] Pointer to virtual memory. * * This function will free memory allocated for graphics context buffer, * patch context buffer, and all the ctxsw buffers. */ void nvgpu_gr_ctx_free(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, - struct vm_gk20a *vm); + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer); -/** - * @brief Allocate patch context buffer. - * - * @param g [in] Pointer to GPU driver struct. - * @param gr_ctx [in] Pointer to graphics context struct. - * @param gr_ctx_desc [in] Pointer to context descriptor struct. - * @param vm [in] Pointer to virtual memory. - * - * This function allocates memory for patch context buffer and also - * maps it to given virtual memory. - * - * @return 0 in case of success, < 0 in case of failure. - * @retval -ENOMEM if context memory allocation fails. - */ -int nvgpu_gr_ctx_alloc_patch_ctx(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_ctx_desc *gr_ctx_desc, - struct vm_gk20a *vm); - -/** - * @brief Free patch context buffer. - * - * @param g [in] Pointer to GPU driver struct. - * @param vm [in] Pointer to virtual memory. - * @param gr_ctx [in] Pointer to graphics context struct. - * - * This function will free memory allocated for patch context buffer. - */ -void nvgpu_gr_ctx_free_patch_ctx(struct gk20a *g, struct vm_gk20a *vm, - struct nvgpu_gr_ctx *gr_ctx); - -/** - * @brief Map global context buffers. - * - * @param g [in] Pointer to GPU driver struct. - * @param gr_ctx [in] Pointer to graphics context struct. - * @param global_ctx_buffer [in]Pointer to global context descriptor struct. - * @param vm [in] Pointer to virtual memory. - * @param vpr [in] Boolean flag to use buffers in VPR. - * - * This function maps all global context buffers into given - * virtual memory and stores each virtual address into given - * #nvgpu_gr_ctx struct. - * - * @return 0 in case of success, < 0 in case of failure. - * @retval -ENOMEM if memory mapping fails for any context buffer. - */ -int nvgpu_gr_ctx_map_global_ctx_buffers(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, - struct vm_gk20a *vm, bool vpr); - -/** - * @brief Get global context buffer virtual address. - * - * @param gr_ctx [in] Pointer to graphics context struct. - * @param index [in] Index of global context buffer. - * - * This function returns virtual address of global context buffer - * with given index stored in #nvgpu_gr_ctx struct. - * - * @return virtual address of global context buffer. - */ -u64 nvgpu_gr_ctx_get_global_ctx_va(struct nvgpu_gr_ctx *gr_ctx, - u32 index); - -/** - * @brief Get pointer of patch context buffer memory struct. - * - * @param gr_ctx [in] Pointer to graphics context struct. - * - * This function returns #nvgpu_mem pointer of patch context buffer stored - * in #nvgpu_gr_ctx struct. - * - * @return pointer to patch context buffer memory struct. - */ -struct nvgpu_mem *nvgpu_gr_ctx_get_patch_ctx_mem(struct nvgpu_gr_ctx *gr_ctx); /** * @brief Set data count in patch context buffer. @@ -283,15 +201,28 @@ void nvgpu_gr_ctx_set_patch_ctx_data_count(struct nvgpu_gr_ctx *gr_ctx, u32 data_count); /** - * @brief Get sm diversity config of the given graphics context. + * @brief Get context buffer mem struct of the given graphics context. * * @param gr_ctx [in] Pointer to graphics context struct. + * @param index [in] Value from (NVGPU_GR_CTX_CTX, NVGPU_GR_CTX_GFXP_RTVCB_CTXSW) * - * This function returns #sm_diversity_config of graphics context struct. + * This function returns #mem of graphics context struct. * - * @return sm diversity config of the given graphics context. + * @return context buffer mem of the given graphics context. */ -struct nvgpu_mem *nvgpu_gr_ctx_get_ctx_mem(struct nvgpu_gr_ctx *gr_ctx); +struct nvgpu_mem *nvgpu_gr_ctx_get_ctx_mem(struct nvgpu_gr_ctx *gr_ctx, u32 index); + +/** + * @brief Get mapping flags of a context buffer of the given graphics context. + * + * @param gr_ctx [in] Pointer to graphics context struct. + * @param index [in] Value from (NVGPU_GR_CTX_CTX, NVGPU_GR_CTX_GFXP_RTVCB_CTXSW) + * + * This function returns #mapping_flags of graphics context struct. + * + * @return context buffer mapping flags of the given graphics context. + */ +u32 nvgpu_gr_ctx_get_ctx_mapping_flags(struct nvgpu_gr_ctx *gr_ctx, u32 index); #ifdef CONFIG_NVGPU_SM_DIVERSITY /** @@ -320,6 +251,7 @@ u32 nvgpu_gr_ctx_get_sm_diversity_config(struct nvgpu_gr_ctx *gr_ctx); * * @param g [in] Pointer to GPU driver struct. * @param gr_ctx [in] Pointer to graphics context struct. + * @param mappings [in] Pointer to mappings of GR context buffers. * @param local_golden_image [in] Pointer to local golden image struct. * @param cde [in] Boolean flag to enable/disable CDE. * @@ -332,6 +264,7 @@ u32 nvgpu_gr_ctx_get_sm_diversity_config(struct nvgpu_gr_ctx *gr_ctx); */ void nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_mappings *mappings, struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image, bool cde); @@ -481,6 +414,99 @@ struct nvgpu_gr_ctx *nvgpu_alloc_gr_ctx_struct(struct gk20a *g); */ void nvgpu_free_gr_ctx_struct(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx); +/** + * @brief Free TSG specific GR context buffers. + * + * @param g [in] Pointer to GPU driver struct. + * @param ctx [in] Pointer to graphics context struct. + * + * This function frees all TSG specific GR context buffers. + */ +void nvgpu_gr_ctx_free_ctx_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *ctx); + +/** + * @brief Allocate TSG specific GR context buffers. + * + * @param g [in] Pointer to GPU driver struct. + * @param desc [in] Pointer to context descriptor struct. + * @param ctx [in] Pointer to graphics context struct. + * + * This function allocates all TSG specific GR context buffers. + * + * @return 0 in case of success, < 0 in case of failure. + */ +int nvgpu_gr_ctx_alloc_ctx_buffers(struct gk20a *g, + struct nvgpu_gr_ctx_desc *desc, + struct nvgpu_gr_ctx *ctx); + +#ifdef CONFIG_NVGPU_GFXP +/** + * @brief Allocate TSG specific GR preemption context buffers. + * + * @param g [in] Pointer to GPU driver struct. + * @param desc [in] Pointer to context descriptor struct. + * @param ctx [in] Pointer to graphics context struct. + * + * This function allocates all TSG specific GR preemption context buffers. + * + * @return 0 in case of success, < 0 in case of failure. + */ +int nvgpu_gr_ctx_alloc_ctx_preemption_buffers(struct gk20a *g, + struct nvgpu_gr_ctx_desc *desc, + struct nvgpu_gr_ctx *ctx); +#endif + +/** + * @brief Initialize mapping flags for GR context buffers. + * + * @param g [in] Pointer to GPU driver struct. + * @param ctx [in] Pointer to graphics context struct. + * + * This function initializes cacheability attribute for TSG specific + * GR context buffers. + */ +void nvgpu_gr_ctx_init_ctx_buffers_mapping_flags(struct gk20a *g, + struct nvgpu_gr_ctx *ctx); + +/** + * @brief Allocate or get GR ctx buffers mappings for a TSG. + * + * @param g [in] Pointer to GPU driver struct. + * @param tsg [in] Pointer to TSG struct. + * @param vm [in] Pointer to vm struct. + * + * This function allocates the mappings struct for TSG corresponding to + * given vm if not available already else returns the same. + * + * @return mappings struct in case of success, null in case of failure. + */ +struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_alloc_or_get_mappings(struct gk20a *g, + struct nvgpu_tsg *tsg, struct vm_gk20a *vm); + +/** + * @brief Get GR ctx buffers mappings for a TSG. + * + * @param tsg [in] Pointer to TSG struct. + * + * This function returns the mappings struct for TSG. + * + * @return mappings struct. + */ +struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_get_mappings(struct nvgpu_tsg *tsg); + +/** + * @brief Free the gr ctx mapping struct. + * + * @param g [in] Pointer to GPU driver struct. + * @param ctx [in] Pointer to graphics context struct. + * + * This function deletes the gr ctx mapping struct. This is to be + * called when freeing the gr context or in error cases. + */ +void nvgpu_gr_ctx_free_mappings(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx); + /** * @brief Set TSG id in graphics context structure. * @@ -515,28 +541,9 @@ bool nvgpu_gr_ctx_desc_force_preemption_cilp( #endif /* CONFIG_NVGPU_CILP */ #ifdef CONFIG_NVGPU_GFXP -int nvgpu_gr_ctx_alloc_ctxsw_buffers(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, - struct nvgpu_gr_ctx_desc *gr_ctx_desc, - struct vm_gk20a *vm); - -struct nvgpu_mem *nvgpu_gr_ctx_get_spill_ctxsw_buffer( - struct nvgpu_gr_ctx *gr_ctx); - -struct nvgpu_mem *nvgpu_gr_ctx_get_betacb_ctxsw_buffer( - struct nvgpu_gr_ctx *gr_ctx); - -struct nvgpu_mem *nvgpu_gr_ctx_get_pagepool_ctxsw_buffer( - struct nvgpu_gr_ctx *gr_ctx); - -struct nvgpu_mem *nvgpu_gr_ctx_get_preempt_ctxsw_buffer( - struct nvgpu_gr_ctx *gr_ctx); - -struct nvgpu_mem *nvgpu_gr_ctx_get_gfxp_rtvcb_ctxsw_buffer( - struct nvgpu_gr_ctx *gr_ctx); - void nvgpu_gr_ctx_set_preemption_buffer_va(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx); + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_mappings *mappings); bool nvgpu_gr_ctx_desc_force_preemption_gfxp( struct nvgpu_gr_ctx_desc *gr_ctx_desc); @@ -559,12 +566,10 @@ int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, #endif /* CONFIG_NVGPU_GRAPHICS */ #ifdef CONFIG_NVGPU_DEBUGGER -int nvgpu_gr_ctx_alloc_pm_ctx(struct gk20a *g, - struct nvgpu_gr_ctx *gr_ctx, +int nvgpu_gr_ctx_alloc_map_pm_ctx(struct gk20a *g, + struct nvgpu_tsg *tsg, struct nvgpu_gr_ctx_desc *gr_ctx_desc, - struct vm_gk20a *vm); -void nvgpu_gr_ctx_free_pm_ctx(struct gk20a *g, struct vm_gk20a *vm, - struct nvgpu_gr_ctx *gr_ctx); + struct nvgpu_gr_hwpm_map *hwpm_map); void nvgpu_gr_ctx_reset_patch_count(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx); @@ -573,18 +578,19 @@ void nvgpu_gr_ctx_set_patch_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx); u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx); u32 nvgpu_gr_ctx_read_ctx_id(struct nvgpu_gr_ctx *gr_ctx); -struct nvgpu_mem *nvgpu_gr_ctx_get_pm_ctx_mem(struct nvgpu_gr_ctx *gr_ctx); - void nvgpu_gr_ctx_set_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx, u32 pm_mode); u32 nvgpu_gr_ctx_get_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx); int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, bool enable); -int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, - u32 mode, bool *skip_update); +int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + u32 mode, u64 *pm_ctx_gpu_va, bool *skip_update); void nvgpu_gr_ctx_set_hwpm_pm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx); -void nvgpu_gr_ctx_set_hwpm_ptr(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx); +void nvgpu_gr_ctx_set_hwpm_ptr(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + u64 pm_ctx_gpu_va); +void nvgpu_gr_ctx_set_pm_ctx_mapped(struct nvgpu_gr_ctx *ctx, bool mapped); #ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING void nvgpu_gr_ctx_set_boosted_ctx(struct nvgpu_gr_ctx *gr_ctx, bool boost); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx_mappings.h b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx_mappings.h new file mode 100644 index 000000000..0ce3e54a4 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx_mappings.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_CTX_MAPPINGS_H +#define NVGPU_GR_CTX_MAPPINGS_H + +struct gk20a; +struct nvgpu_tsg; +struct vm_gk20a; +struct nvgpu_gr_ctx; +struct nvgpu_gr_ctx_mappings; +struct nvgpu_gr_global_ctx_buffer_desc; + +/** + * @brief Create GR ctx buffers mappings for a TSG. + * + * @param g [in] Pointer to GPU driver struct. + * @param tsg [in] Pointer to TSG struct. + * @param vm [in] Pointer to vm struct. + * + * This function allocates the mappings struct for TSG corresponding to + * given vm. + * + * @return mappings struct in case of success, null in case of failure. + */ +struct nvgpu_gr_ctx_mappings *nvgpu_gr_ctx_mappings_create(struct gk20a *g, + struct nvgpu_tsg *tsg, struct vm_gk20a *vm); + +/** + * @brief Free the GR ctx buffers mappings. + * + * @param g [in] Pointer to GPU driver struct. + * @param mappings [in] Pointer to GR ctx buffers mappings struct. + * + * This function frees the mappings struct. + */ +void nvgpu_gr_ctx_mappings_free(struct gk20a *g, + struct nvgpu_gr_ctx_mappings *mappings); + +/** + * @brief Map GR context buffer and store in mappings struct. + * + * @param g [in] Pointer to GPU driver struct. + * @param ctx [in] Pointer to GR context struct. + * @param index [in] index of the buffer. + * @param mappings [in] Pointer to GR context buffer mappings struct. + * + * This function will map the GR context buffer at #index in #mappings->vm + * and stores the mapped address. + * + * @return 0 in case of success, < 0 in case of failure. + */ +int nvgpu_gr_ctx_mappings_map_ctx_buffer(struct gk20a *g, + struct nvgpu_gr_ctx *ctx, u32 index, + struct nvgpu_gr_ctx_mappings *mappings); + +/** + * @brief Map GR context preemption buffers and store in mappings struct. + * + * @param g [in] Pointer to GPU driver struct. + * @param ctx [in] Pointer to GR context struct. + * @param mappings [in] Pointer to GR context buffer mappings struct. + * + * This function will map the GR context preemption buffers in #mappings->vm + * and stores the mapped address. + * + * @return 0 in case of success, < 0 in case of failure. + */ +int nvgpu_gr_ctx_mappings_map_ctx_preemption_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *ctx, + struct nvgpu_gr_ctx_mappings *mappings); + +/** + * @brief Map GR and global context buffers and store in mappings struct. + * + * @param g [in] Pointer to GPU driver struct. + * @param gr_ctx [in] Pointer to GR context struct. + * @param global_ctx_buffer [in] Pointer global context buffer desc. + * @param mappings [in] Pointer to GR context buffer + * mappings struct. + * @param vpr [in] Indicates if VPR buffer copy is to be + * mapped. + * + * This function will map the GR and global context buffers in #mappings->vm + * and stores the mapped address. + * + * @return 0 in case of success, < 0 in case of failure. + */ +int nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct nvgpu_gr_ctx_mappings *mappings, + bool vpr); + +/** + * @brief Unmap GR and global context buffers and store in mappings struct. + * + * @param g [in] Pointer to GPU driver struct. + * @param gr_ctx [in] Pointer to GR context struct. + * @param global_ctx_buffer [in] Pointer global context buffer desc. + * @param mappings [in] Pointer to GR context buffer + * mappings struct. + * + * This function will unmap the GR and global context buffers in #mappings->vm. + */ +void nvgpu_gr_ctx_unmap_buffers(struct gk20a *g, + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, + struct nvgpu_gr_ctx_mappings *mappings); + +/** + * @brief Get global context buffer gpu virtual address. + * + * @param mappings [in] Pointer to GR context buffer + * mappings struct. + * @param index [in] index of the buffer. + * + * This function will get the gpu virtual address of the global context buffer + * in #mappings. + * + * @return gpu virtual address of global context buffer. + */ +u64 nvgpu_gr_ctx_mappings_get_global_ctx_va(struct nvgpu_gr_ctx_mappings *mappings, + u32 index); + +/** + * @brief Get GR context buffer gpu virtual address. + * + * @param mappings [in] Pointer to GR context buffer + * mappings struct. + * @param index [in] index of the buffer. + * + * This function will get the gpu virtual address of the GR context buffer + * in #mappings. + * + * @return gpu virtual address of GR context buffer. + */ +u64 nvgpu_gr_ctx_mappings_get_ctx_va(struct nvgpu_gr_ctx_mappings *mappings, + u32 index); + +#endif /* NVGPU_GR_CTX_MAPPINGS_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h b/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h index 2efdab06f..d4a305682 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h @@ -63,6 +63,7 @@ struct gk20a; struct nvgpu_mem; struct nvgpu_gr_subctx; struct nvgpu_gr_ctx; +struct nvgpu_gr_ctx_mappings; struct nvgpu_tsg; struct vm_area_struct; @@ -169,7 +170,8 @@ int nvgpu_gr_fecs_trace_reset(struct gk20a *g); int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g, struct nvgpu_mem *inst_block, struct nvgpu_gr_subctx *subctx, - struct nvgpu_gr_ctx *gr_ctx, pid_t pid, u32 vmid); + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_ctx_mappings *mappings, + pid_t pid, u32 vmid); int nvgpu_gr_fecs_trace_unbind_channel(struct gk20a *g, struct nvgpu_mem *inst_block); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/global_ctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/global_ctx.h index b2cb34a43..1ac757de8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/global_ctx.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/global_ctx.h @@ -181,6 +181,18 @@ size_t nvgpu_gr_global_ctx_get_size(struct nvgpu_gr_global_ctx_buffer_desc *desc int nvgpu_gr_global_ctx_buffer_alloc(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *desc); +/** + * @brief Initialize mapping flags for GR global context buffers. + * + * @param g [in] Pointer to GPU driver struct. + * @param desc [in] Pointer to global ctx buffer desc. + * + * This function initializes cacheability attribute for GR global + * context buffers. + */ +void nvgpu_gr_global_ctx_init_ctx_buffers_mapping_flags(struct gk20a *g, + struct nvgpu_gr_global_ctx_buffer_desc *desc); + /** * @brief Free all global context buffers. * @@ -199,7 +211,6 @@ void nvgpu_gr_global_ctx_buffer_free(struct gk20a *g, * @param desc [in] Pointer to global context descriptor struct. * @param index [in] Index of global context buffer. * @param vm [in] Pointer to virtual memory. - * @param flags [in] Flags used to specify mapping attributes. * @param priv [in] Boolean flag to allocate privileged PTE. * * This function maps given global contex buffer with index #index into @@ -209,8 +220,7 @@ void nvgpu_gr_global_ctx_buffer_free(struct gk20a *g, * 0 in case of failure. */ u64 nvgpu_gr_global_ctx_buffer_map(struct nvgpu_gr_global_ctx_buffer_desc *desc, - u32 index, - struct vm_gk20a *vm, u32 flags, bool priv); + u32 index, struct vm_gk20a *vm, bool priv); /** * @brief Unmap given global context buffer. diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h index 4649f9960..d0a013bfe 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/obj_ctx.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -33,6 +33,7 @@ */ struct gk20a; struct nvgpu_gr_ctx; +struct nvgpu_gr_ctx_mappings; struct nvgpu_gr_subctx; struct nvgpu_gr_config; struct nvgpu_gr_ctx_desc; @@ -70,7 +71,7 @@ void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g, * @param inst_block [in] Pointer to channel instance block. * @param gr_ctx [in] Pointer to graphics context buffer. * @param subctx [in] Pointer to graphics subcontext buffer. - * @param gpu_va [in] GPU virtual address of graphics context buffer. + * @param mappings [in] Pointer to mappings of the GR context buffers. * * If graphics subcontexts are supported, subcontext buffer GPU virtual * address should be committed to channel instance block. Otherwise graphics @@ -82,7 +83,7 @@ void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g, */ void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block, struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, - u64 gpu_va); + struct nvgpu_gr_ctx_mappings *mappings); /** * brief Initialize preemption mode in context struct. @@ -91,7 +92,6 @@ void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block, * @param config [in] Pointer to GR configuration struct. * @param gr_ctx_desc [in] Pointer to GR context descriptor struct. * @param gr_ctx [in] Pointer to graphics context. - * @param vm [in] Pointer to virtual memory. * @param class_num [in] GR engine class. * @param graphics_preempt_mode Graphics preemption mode to set. * @param compute_preempt_mode Compute preemption mode to set. @@ -111,7 +111,7 @@ void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block, */ int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc, - struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class_num, + struct nvgpu_gr_ctx *gr_ctx, u32 class_num, u32 graphics_preempt_mode, u32 compute_preempt_mode); /** @@ -121,6 +121,7 @@ int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g, * @param config [in] Pointer to GR configuration struct. * @param gr_ctx [in] Pointer to graphics context. * @param subctx [in] Pointer to graphics subcontext buffer. + * @param mappings [in] Pointer to mappings of GR context buffers. * * This function will read preemption modes stored in #nvgpu_gr_ctx * struct and write them into graphics context image. @@ -133,7 +134,8 @@ int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g, */ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_config *config, - struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx); + struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, + struct nvgpu_gr_ctx_mappings *mappings); /** * brief Update global context buffer addresses in graphics context. @@ -142,6 +144,7 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, * @param global_ctx_buffer [in] Pointer to global context descriptor struct. * @param config [in] Pointer to GR configuration struct. * @param gr_ctx [in] Pointer to graphics context. + * @param mappings [in] Pointer to mappings of GR context buffers. * @param patch [in] Boolean flag to use patch context buffer. * * This function will update GPU virtual addresses of global context @@ -152,7 +155,8 @@ void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g, */ void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, - struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, bool patch); + struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_mappings *mappings, bool patch); /** * @brief Allocate golden context image. @@ -193,6 +197,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_mappings *mappings, struct nvgpu_mem *inst_block); /** @@ -205,7 +210,7 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, * @param config [in] Pointer to GR configuration struct. * @param gr_ctx [in] Pointer to graphics context. * @param subctx [in] Pointer to graphics subcontext buffer. - * @param vm [in] Pointer to virtual memory. + * @param mappings [in] Pointer to mappings of the GR context buffers. * @param inst_block [in] Pointer to channel instance block. * @param class_num [in] GR engine class. * @param flags [in] Object context attribute flags. @@ -216,8 +221,8 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, * This function allocates object context for the GPU channel. * Allocating object context includes: * - * - Allocating graphics context buffer. See #nvgpu_gr_obj_ctx_gr_ctx_alloc(). - * - Allocating patch context buffer. See #nvgpu_gr_ctx_alloc_patch_ctx(). + * - Allocating graphics context buffers. + * - Allocating patch context buffer. * - Allocating golden context image. See #nvgpu_gr_obj_ctx_alloc_golden_ctx_image(). * - Committing global context buffers in graphics context image. * See #nvgpu_gr_obj_ctx_commit_global_ctx_buffers(). @@ -245,7 +250,7 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g, struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, - struct vm_gk20a *vm, + struct nvgpu_gr_ctx_mappings *mappings, struct nvgpu_mem *inst_block, u32 class_num, u32 flags, bool cde, bool vpr); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h b/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h index 34e9a9553..4b6ed33d0 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -88,14 +88,13 @@ int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num, * @brief Free GR engine context image. * * @param g [in] Pointer to GPU driver struct. - * @param vm [in] Pointer to virtual memory. * @param gr_ctx [in] Pointer to GR engine context image. * * This function will free memory allocated for patch context image and * GR engine context image in #nvgpu_gr_setup_alloc_obj_ctx(). */ void nvgpu_gr_setup_free_gr_ctx(struct gk20a *g, - struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); + struct nvgpu_gr_ctx *gr_ctx); /** * @brief Free GR engine subcontext. diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h index 75049cf5a..8739a165b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h @@ -34,6 +34,7 @@ struct gk20a; struct vm_gk20a; struct nvgpu_gr_subctx; struct nvgpu_mem; +struct nvgpu_gr_ctx_mappings; /** * @brief Allocate graphics subcontext buffer. @@ -73,7 +74,8 @@ void nvgpu_gr_subctx_free(struct gk20a *g, * @param g [in] Pointer to GPU driver struct. * @param subctx [in] Pointer to graphics subcontext struct. * @param gr_ctx [in] Pointer to graphics context struct. - * @param gpu_va [in] GPU virtual address of graphics context buffer. + * @param mappings [in] GPU virtual address mappings of graphics + * context buffers. * * This function will initialize graphics subcontext buffer header * by reading appropriate values from #nvgpu_gr_ctx structure and @@ -84,7 +86,8 @@ void nvgpu_gr_subctx_free(struct gk20a *g, */ void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g, struct nvgpu_gr_subctx *subctx, - struct nvgpu_gr_ctx *gr_ctx, u64 gpu_va); + struct nvgpu_gr_ctx *gr_ctx, + struct nvgpu_gr_ctx_mappings *mappings); /** * @brief Get pointer of subcontext header memory struct. @@ -103,11 +106,12 @@ void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx struct nvgpu_gr_ctx *gr_ctx); void nvgpu_gr_subctx_set_preemption_buffer_va(struct gk20a *g, - struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx); + struct nvgpu_gr_subctx *subctx, + struct nvgpu_gr_ctx_mappings *mappings); #endif #ifdef CONFIG_NVGPU_DEBUGGER void nvgpu_gr_subctx_set_hwpm_ptr(struct gk20a *g, - struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx); + struct nvgpu_gr_subctx *subctx, u64 pm_ctx_gpu_va); #endif #endif /* NVGPU_GR_SUBCTX_H */ diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index e81e05a94..06679747c 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -1872,7 +1872,7 @@ static int nvgpu_dbg_gpu_ioctl_get_gr_context_size(struct dbg_session_gk20a *dbg return -EINVAL; } - ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx); + ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx, NVGPU_GR_CTX_CTX); if (ctx_mem == NULL || !nvgpu_mem_is_valid(ctx_mem)) { nvgpu_err(g, "invalid context mem"); return -EINVAL; @@ -1918,7 +1918,7 @@ static int nvgpu_dbg_gpu_ioctl_get_gr_context(struct dbg_session_gk20a *dbg_s, return -EINVAL; } - ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx); + ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx, NVGPU_GR_CTX_CTX); if (ctx_mem == NULL || !nvgpu_mem_is_valid(ctx_mem)) { nvgpu_err(g, "invalid context mem"); return -EINVAL; diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h index 62fdb9f63..09b66ef82 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h +++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h @@ -29,7 +29,6 @@ struct gk20a; struct nvgpu_channel; -struct gr_ctx_buffer_desc; struct gk20a_scale_profile; struct secure_page_buffer { diff --git a/libs/dgpu/libnvgpu-drv-dgpu_safe.export b/libs/dgpu/libnvgpu-drv-dgpu_safe.export index 2a7e0b6d5..fd477b3e7 100644 --- a/libs/dgpu/libnvgpu-drv-dgpu_safe.export +++ b/libs/dgpu/libnvgpu-drv-dgpu_safe.export @@ -446,13 +446,10 @@ nvgpu_gr_config_set_sm_info_gpc_index nvgpu_gr_config_set_sm_info_sm_index nvgpu_gr_config_set_sm_info_tpc_index nvgpu_gr_ctx_alloc -nvgpu_gr_ctx_alloc_patch_ctx nvgpu_gr_ctx_desc_alloc nvgpu_gr_ctx_desc_free nvgpu_gr_ctx_free -nvgpu_gr_ctx_free_patch_ctx nvgpu_gr_ctx_get_tsgid -nvgpu_gr_ctx_map_global_ctx_buffers nvgpu_gr_ctx_patch_write nvgpu_gr_ctx_patch_write_begin nvgpu_gr_ctx_patch_write_end diff --git a/libs/igpu/libnvgpu-drv-igpu_safe.export b/libs/igpu/libnvgpu-drv-igpu_safe.export index be292eb22..53699cc81 100644 --- a/libs/igpu/libnvgpu-drv-igpu_safe.export +++ b/libs/igpu/libnvgpu-drv-igpu_safe.export @@ -462,18 +462,20 @@ nvgpu_gr_config_set_sm_info_global_tpc_index nvgpu_gr_config_set_sm_info_gpc_index nvgpu_gr_config_set_sm_info_sm_index nvgpu_gr_config_set_sm_info_tpc_index -nvgpu_gr_ctx_alloc -nvgpu_gr_ctx_alloc_patch_ctx nvgpu_gr_ctx_desc_alloc nvgpu_gr_ctx_desc_free nvgpu_gr_ctx_free -nvgpu_gr_ctx_free_patch_ctx nvgpu_gr_ctx_get_tsgid -nvgpu_gr_ctx_map_global_ctx_buffers nvgpu_gr_ctx_patch_write nvgpu_gr_ctx_patch_write_begin nvgpu_gr_ctx_patch_write_end nvgpu_gr_ctx_set_size +nvgpu_gr_ctx_alloc_ctx_buffers +nvgpu_gr_ctx_free_ctx_buffers +nvgpu_gr_ctx_mappings_create +nvgpu_gr_ctx_alloc_or_get_mappings +nvgpu_gr_ctx_mappings_map_gr_ctx_buffers +nvgpu_gr_ctx_get_ctx_mem nvgpu_gr_enable_hw nvgpu_gr_engine_interrupt_mask nvgpu_gr_falcon_get_fecs_ucode_segments diff --git a/userspace/units/fifo/tsg/nvgpu-tsg.c b/userspace/units/fifo/tsg/nvgpu-tsg.c index a7d8e4adf..601b289a6 100644 --- a/userspace/units/fifo/tsg/nvgpu-tsg.c +++ b/userspace/units/fifo/tsg/nvgpu-tsg.c @@ -622,10 +622,9 @@ done: #define F_TSG_RELEASE_NO_RELEASE_HAL BIT(0) #define F_TSG_RELEASE_GR_CTX BIT(1) #define F_TSG_RELEASE_MEM BIT(2) -#define F_TSG_RELEASE_VM BIT(3) -#define F_TSG_RELEASE_ENG_BUFS BIT(4) -#define F_TSG_RELEASE_SM_ERR_STATES BIT(5) -#define F_TSG_RELEASE_LAST BIT(6) +#define F_TSG_RELEASE_ENG_BUFS BIT(3) +#define F_TSG_RELEASE_SM_ERR_STATES BIT(4) +#define F_TSG_RELEASE_LAST BIT(5) static void stub_tsg_release(struct nvgpu_tsg *tsg) @@ -640,7 +639,7 @@ static void stub_tsg_deinit_eng_method_buffers(struct gk20a *g, } static void stub_gr_setup_free_gr_ctx(struct gk20a *g, - struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) + struct nvgpu_gr_ctx *gr_ctx) { stub[1].name = __func__; stub[1].count++; @@ -650,24 +649,32 @@ static void stub_gr_setup_free_gr_ctx(struct gk20a *g, int test_tsg_release(struct unit_module *m, struct gk20a *g, void *args) { + struct nvgpu_gr_ctx_desc *gr_ctx_desc; + struct nvgpu_mem *gr_ctx_mem; struct nvgpu_fifo *f = &g->fifo; struct gpu_ops gops = g->ops; struct nvgpu_tsg *tsg = NULL; struct vm_gk20a vm; u32 branches = 0U; int ret = UNIT_FAIL; - struct nvgpu_mem mem; u32 free_gr_ctx_mask = - F_TSG_RELEASE_GR_CTX|F_TSG_RELEASE_MEM|F_TSG_RELEASE_VM; + F_TSG_RELEASE_GR_CTX|F_TSG_RELEASE_MEM; const char *labels[] = { "no_release_hal", "gr_ctx", "mem", - "vm", "eng_bufs", "sm_err_states" }; + gr_ctx_desc = nvgpu_gr_ctx_desc_alloc(g); + if (!gr_ctx_desc) { + unit_return_fail(m, "failed to allocate memory"); + } + + nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_CTX, + NVGPU_CPU_PAGE_SIZE); + for (branches = 0U; branches < F_TSG_RELEASE_LAST; branches++) { if (!(branches & F_TSG_RELEASE_GR_CTX) && @@ -683,8 +690,9 @@ int test_tsg_release(struct unit_module *m, tsg = nvgpu_tsg_open(g, getpid()); unit_assert(tsg != NULL, goto done); unit_assert(tsg->gr_ctx != NULL, goto done); - unit_assert(tsg->gr_ctx->mem.aperture == - APERTURE_INVALID, goto done); + + gr_ctx_mem = nvgpu_gr_ctx_get_ctx_mem(tsg->gr_ctx, NVGPU_GR_CTX_CTX); + unit_assert(gr_ctx_mem->aperture == APERTURE_INVALID, goto done); g->ops.tsg.release = branches & F_TSG_RELEASE_NO_RELEASE_HAL ? @@ -696,11 +704,8 @@ int test_tsg_release(struct unit_module *m, } if (branches & F_TSG_RELEASE_MEM) { - nvgpu_dma_alloc(g, NVGPU_CPU_PAGE_SIZE, &mem); - tsg->gr_ctx->mem = mem; - } - - if (branches & F_TSG_RELEASE_VM) { + ret = nvgpu_gr_ctx_alloc_ctx_buffers(g, gr_ctx_desc, tsg->gr_ctx); + unit_assert(ret == UNIT_SUCCESS, goto done); tsg->vm = &vm; /* prevent nvgpu_vm_remove */ nvgpu_ref_init(&vm.ref); @@ -734,7 +739,7 @@ int test_tsg_release(struct unit_module *m, gops.gr.setup.free_gr_ctx; if (branches & F_TSG_RELEASE_MEM) { - nvgpu_dma_free(g, &mem); + nvgpu_gr_ctx_free_ctx_buffers(g, tsg->gr_ctx); } if (tsg->gr_ctx != NULL) { diff --git a/userspace/units/fifo/tsg/nvgpu-tsg.h b/userspace/units/fifo/tsg/nvgpu-tsg.h index 4cbebfc09..64f50b9fc 100644 --- a/userspace/units/fifo/tsg/nvgpu-tsg.h +++ b/userspace/units/fifo/tsg/nvgpu-tsg.h @@ -177,17 +177,19 @@ int test_tsg_unbind_channel(struct unit_module *m, * - Check that in_use is false. * - Check de-allocation of other resources: * - Case where g->ops.gr.setup.free_gr_ctx is called. - * It requires dummy vm, gr_ctx and gr_ctx->mem to be allocated. + * It requires dummy vm, gr_ctx and gr_ctx->mem[NVGPU_GR_CTX_CTX] to be + * allocated. * A stub is used to check that the HAL was actually invoked. - * - Other combinations of vm, gr_ctx and gr_ctx->mem allocations, to - * check that g->ops.gr.setup.free_gr_ctx is not called. + * - Other combinations of vm, gr_ctx and gr_ctx->mem[NVGPU_GR_CTX_CTX] + * allocations, to check that g->ops.gr.setup.free_gr_ctx is not called. * - Unhook of event_ids (by adding 2 dummy events in event_id list, and * checking that list is empty after TSG release). * - Case where event_id is empty before TSG release is tested as well * - Check that VM refcount is decremented (and VM deallocated in our * case), when present. * - Check that sm_error_states is deallocated. - * - Check any combination of VM, gr_ctx, gr_ctx->mem, and sm_error_state. + * - Check any combination of VM, gr_ctx, gr_ctx->mem[NVGPU_GR_CTX_CTX], and + * sm_error_state. * * Output: Returns PASS if all branches gave expected results. FAIL otherwise. */ diff --git a/userspace/units/gr/ctx/nvgpu-gr-ctx.c b/userspace/units/gr/ctx/nvgpu-gr-ctx.c index 56bd01902..05b20b8e5 100644 --- a/userspace/units/gr/ctx/nvgpu-gr-ctx.c +++ b/userspace/units/gr/ctx/nvgpu-gr-ctx.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,37 @@ #define DUMMY_SIZE 0xF0U +static u64 nvgpu_gmmu_map_locked_stub(struct vm_gk20a *vm, + u64 vaddr, + struct nvgpu_sgt *sgt, + u64 buffer_offset, + u64 size, + u32 pgsz_idx, + u8 kind_v, + u32 ctag_offset, + u32 flags, + enum gk20a_mem_rw_flag rw_flag, + bool clear_ctags, + bool sparse, + bool priv, + struct vm_gk20a_mapping_batch *batch, + enum nvgpu_aperture aperture) +{ + return 1; +} + +static void nvgpu_gmmu_unmap_locked_stub(struct vm_gk20a *vm, + u64 vaddr, + u64 size, + u32 pgsz_idx, + bool va_allocated, + enum gk20a_mem_rw_flag rw_flag, + bool sparse, + struct vm_gk20a_mapping_batch *batch) +{ + return; +} + int test_gr_ctx_error_injection(struct unit_module *m, struct gk20a *g, void *args) { @@ -51,12 +83,22 @@ int test_gr_ctx_error_injection(struct unit_module *m, struct vm_gk20a *vm; struct nvgpu_gr_ctx_desc *desc; struct nvgpu_gr_global_ctx_buffer_desc *global_desc; + struct nvgpu_gr_ctx_mappings *mappings = NULL; struct nvgpu_gr_ctx *gr_ctx = NULL; struct nvgpu_posix_fault_inj *dma_fi = nvgpu_dma_alloc_get_fault_injection(); struct nvgpu_posix_fault_inj *kmem_fi = nvgpu_kmem_get_fault_injection(); u64 low_hole = SZ_4K * 16UL; + struct nvgpu_channel *channel = (struct nvgpu_channel *) + malloc(sizeof(struct nvgpu_channel)); + struct nvgpu_tsg *tsg = (struct nvgpu_tsg *) + malloc(sizeof(struct nvgpu_tsg)); + u32 i; + + if (channel == NULL || tsg == NULL) { + unit_return_fail(m, "failed to allocate channel/tsg"); + } desc = nvgpu_gr_ctx_desc_alloc(g); if (!desc) { @@ -84,68 +126,70 @@ int test_gr_ctx_error_injection(struct unit_module *m, unit_return_fail(m, "nvgpu_vm_init failed\n"); } - /* Try to free gr_ctx before it is allocated. */ - nvgpu_gr_ctx_free(g, gr_ctx, NULL, NULL); + channel->g = g; + channel->vm = vm; - gr_ctx = nvgpu_alloc_gr_ctx_struct(g); - if (!gr_ctx) { - unit_return_fail(m, "failed to allocate memory"); - } - - /* Context size is not set, so should fail. */ - err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm); - if (err == 0) { - unit_return_fail(m, "unexpected success"); - } - - /* Set the size now, but inject dma allocation failures. */ - nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_CTX, DUMMY_SIZE); - nvgpu_posix_enable_fault_injection(dma_fi, true, 0); - err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm); - if (err == 0) { - unit_return_fail(m, "unexpected success"); - } - - /* Inject kmem alloc failures to trigger mapping failures */ - nvgpu_posix_enable_fault_injection(dma_fi, false, 0); - nvgpu_posix_enable_fault_injection(kmem_fi, true, 1); - err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm); - if (err == 0) { - unit_return_fail(m, "unexpected success"); - } - - /* Successful allocation */ - nvgpu_posix_enable_fault_injection(kmem_fi, false, 0); - err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm); - if (err != 0) { - unit_return_fail(m, "failed to allocate context"); - } - - /* Try to free patch context before it is allocated. */ - nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx); - - /* Inject allocation error and allocate patch context */ - nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_PATCH_CTX, DUMMY_SIZE); - nvgpu_posix_enable_fault_injection(dma_fi, true, 0); - err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, desc, vm); - if (err == 0) { - unit_return_fail(m, "unexpected success"); - } - - /* Successful allocation */ - nvgpu_posix_enable_fault_injection(dma_fi, false, 0); - err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, desc, vm); - if (err != 0) { - unit_return_fail(m, "failed to allocate patch context"); - } + g->ops.mm.gmmu.map = nvgpu_gmmu_map_locked_stub; + g->ops.mm.gmmu.unmap = nvgpu_gmmu_unmap_locked_stub; global_desc = nvgpu_gr_global_ctx_desc_alloc(g); if (!global_desc) { unit_return_fail(m, "failed to allocate desc"); } - err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc, - vm, false); + /* Try to free gr_ctx before it is allocated. */ + nvgpu_gr_ctx_free(g, gr_ctx, NULL); + + gr_ctx = nvgpu_alloc_gr_ctx_struct(g); + if (!gr_ctx) { + unit_return_fail(m, "failed to allocate memory"); + } + + tsg->gr_ctx = gr_ctx; + + mappings = nvgpu_gr_ctx_alloc_or_get_mappings(g, tsg, vm); + if (mappings == NULL) { + unit_return_fail(m, "failed to allocate gr_ctx mappings"); + } + + /* Context size is not set, so should fail. */ + err = nvgpu_gr_ctx_alloc_ctx_buffers(g, desc, gr_ctx); + if (err == 0) { + unit_return_fail(m, "unexpected success"); + } + + /* Set the size now, but inject dma allocation failures. */ + nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_CTX, DUMMY_SIZE); + nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_PATCH_CTX, DUMMY_SIZE); + + for (i = 0; i < 2; i++) { + nvgpu_posix_enable_fault_injection(dma_fi, true, i); + err = nvgpu_gr_ctx_alloc_ctx_buffers(g, desc, gr_ctx); + if (err == 0) { + unit_return_fail(m, "unexpected success"); + } + nvgpu_posix_enable_fault_injection(dma_fi, false, 0); + } + + err = nvgpu_gr_ctx_alloc_ctx_buffers(g, desc, gr_ctx); + if (err != 0) { + unit_return_fail(m, "unexpected success"); + } + + /* Inject kmem alloc failures to trigger mapping failures */ + for (i = 0; i < 2; i++) { + nvgpu_posix_enable_fault_injection(kmem_fi, true, 2 * i); + err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, + global_desc, mappings, false); + if (err == 0) { + unit_return_fail(m, "unexpected success"); + } + nvgpu_posix_enable_fault_injection(kmem_fi, false, 0); + } + + /* global ctx_desc size is not set. */ + err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc, + mappings, false); if (err == 0) { unit_return_fail(m, "unexpected success"); } @@ -164,42 +208,21 @@ int test_gr_ctx_error_injection(struct unit_module *m, unit_return_fail(m, "failed to allocate global buffers"); } - /* Fail global circular buffer mapping */ - nvgpu_posix_enable_fault_injection(kmem_fi, true, 0); - err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc, - vm, false); - if (err == 0) { - unit_return_fail(m, "unexpected success"); + /* Fail global ctx buffer mappings */ + for (i = 0; i < 4; i++) { + nvgpu_posix_enable_fault_injection(kmem_fi, true, 4 + (2 * i)); + err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc, + mappings, false); + if (err == 0) { + unit_return_fail(m, "unexpected success"); + } + nvgpu_posix_enable_fault_injection(kmem_fi, false, 0); } - /* Fail global attribute buffer mapping */ - nvgpu_posix_enable_fault_injection(kmem_fi, true, 4); - err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc, - vm, false); - if (err == 0) { - unit_return_fail(m, "unexpected success"); - } - - /* Fail global pagepool buffer mapping */ - nvgpu_posix_enable_fault_injection(kmem_fi, true, 8); - err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc, - vm, false); - if (err == 0) { - unit_return_fail(m, "unexpected success"); - } - - /* Fail global access map buffer mapping */ - nvgpu_posix_enable_fault_injection(kmem_fi, true, 12); - err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc, - vm, false); - if (err == 0) { - unit_return_fail(m, "unexpected success"); - } /* Successful mapping */ - nvgpu_posix_enable_fault_injection(kmem_fi, false, 0); - err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx, global_desc, - vm, false); + err = nvgpu_gr_ctx_mappings_map_gr_ctx_buffers(g, gr_ctx, global_desc, + mappings, false); if (err != 0) { unit_return_fail(m, "failed to map global buffers"); } @@ -225,11 +248,9 @@ int test_gr_ctx_error_injection(struct unit_module *m, nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true); /* cleanup */ - nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx); - nvgpu_gr_ctx_free(g, gr_ctx, global_desc, vm); + nvgpu_gr_ctx_free(g, gr_ctx, global_desc); nvgpu_free_gr_ctx_struct(g, gr_ctx); nvgpu_gr_ctx_desc_free(g, desc); - nvgpu_vm_put(vm); nvgpu_vm_put(g->mm.bar1.vm); return UNIT_SUCCESS; diff --git a/userspace/units/gr/ctx/nvgpu-gr-ctx.h b/userspace/units/gr/ctx/nvgpu-gr-ctx.h index d4bd6efe5..a25f626fc 100644 --- a/userspace/units/gr/ctx/nvgpu-gr-ctx.h +++ b/userspace/units/gr/ctx/nvgpu-gr-ctx.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -40,16 +40,15 @@ struct unit_module; * * Test Type: Feature, Error guessing * - * Targets: #nvgpu_gr_ctx_alloc, + * Targets: #nvgpu_gr_ctx_alloc_ctx_buffers, + * #nvgpu_gr_ctx_free_ctx_buffers, * #nvgpu_gr_ctx_free, * #nvgpu_gr_ctx_desc_alloc, * #nvgpu_gr_ctx_desc_free, * #nvgpu_alloc_gr_ctx_struct, * #nvgpu_free_gr_ctx_struct, * #nvgpu_gr_ctx_set_size, - * #nvgpu_gr_ctx_alloc_patch_ctx, - * #nvgpu_gr_ctx_free_patch_ctx, - * #nvgpu_gr_ctx_map_global_ctx_buffers, + * #nvgpu_gr_ctx_mappings_map_global_ctx_buffers, * #nvgpu_gr_ctx_patch_write_begin, * #nvgpu_gr_ctx_patch_write, * #nvgpu_gr_ctx_patch_write_end. @@ -63,7 +62,6 @@ struct unit_module; * - Inject dma allocation failure and try to allocate gr_ctx, should fail. * - Inject kmem allocation failure and try to allocate gr_ctx, should fail. * - Disable error injection and allocate gr_ctx, should pass. - * - Try to free patch_ctx before it is allocated, should fail. * - Inject dma allocation failure and try to allocate patch_ctx, should fail. * - Disable error injection and allocate patch_ctx, should pass. * - Setup all the global context buffers. diff --git a/userspace/units/gr/global_ctx/nvgpu-gr-global-ctx.c b/userspace/units/gr/global_ctx/nvgpu-gr-global-ctx.c index eea5e03d3..87fb9cc21 100644 --- a/userspace/units/gr/global_ctx/nvgpu-gr-global-ctx.c +++ b/userspace/units/gr/global_ctx/nvgpu-gr-global-ctx.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -100,7 +100,7 @@ int test_gr_global_ctx_alloc_error_injection(struct unit_module *m, /* Ensure mapping fails before buffers are allocated */ gpu_va = nvgpu_gr_global_ctx_buffer_map(desc, - NVGPU_GR_GLOBAL_CTX_CIRCULAR, NULL, 0, false); + NVGPU_GR_GLOBAL_CTX_CIRCULAR, NULL, false); if (gpu_va != 0) { unit_return_fail(m, "unexpected success"); } diff --git a/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.c b/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.c index 273f15083..966a39ea8 100644 --- a/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.c +++ b/userspace/units/gr/init/nvgpu-gr-init-hal-gv11b.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -757,17 +757,12 @@ int test_gr_init_hal_error_injection(struct unit_module *m, } nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_CTX, DUMMY_SIZE); - err = nvgpu_gr_ctx_alloc(g, gr_ctx, desc, vm); + nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_PATCH_CTX, DUMMY_SIZE); + err = nvgpu_gr_ctx_alloc_ctx_buffers(g, desc, gr_ctx); if (err != 0) { unit_return_fail(m, "failed to allocate context"); } - nvgpu_gr_ctx_set_size(desc, NVGPU_GR_CTX_PATCH_CTX, DUMMY_SIZE); - err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, desc, vm); - if (err != 0) { - unit_return_fail(m, "failed to allocate patch context"); - } - /* global_ctx = false and arbitrary size */ g->ops.gr.init.commit_global_pagepool(g, gr_ctx, 0x12345678, DUMMY_SIZE, false, false); @@ -803,7 +798,7 @@ int test_gr_init_hal_error_injection(struct unit_module *m, g->ops = gops; /* cleanup */ - nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx); + nvgpu_gr_ctx_free_ctx_buffers(g, gr_ctx); nvgpu_free_gr_ctx_struct(g, gr_ctx); nvgpu_gr_ctx_desc_free(g, desc); nvgpu_vm_put(vm); diff --git a/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c b/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c index 90f286dbc..e8a5e5a62 100644 --- a/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c +++ b/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -117,6 +118,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, struct nvgpu_gr_ctx_desc *desc; struct nvgpu_gr_global_ctx_buffer_desc *global_desc; struct nvgpu_gr_ctx *gr_ctx = NULL; + struct nvgpu_gr_ctx_mappings *mappings = NULL; struct nvgpu_gr_subctx *subctx = NULL; struct nvgpu_mem inst_block; struct nvgpu_gr_config *config = nvgpu_gr_get_config_ptr(g); @@ -128,6 +130,8 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, nvgpu_local_golden_image_get_fault_injection(); int (*init_sm_id_table_tmp)(struct gk20a *g, struct nvgpu_gr_config *config); + struct nvgpu_tsg *tsg = (struct nvgpu_tsg *) + malloc(sizeof(struct nvgpu_tsg)); /* Inject allocation failures and initialize obj_ctx, should fail */ nvgpu_posix_enable_fault_injection(kmem_fi, true, 0); @@ -171,6 +175,8 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, unit_return_fail(m, "failed to allocate memory"); } + tsg->gr_ctx = gr_ctx; + global_desc = nvgpu_gr_global_ctx_desc_alloc(g); if (!global_desc) { unit_return_fail(m, "failed to allocate desc"); @@ -195,10 +201,15 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, unit_return_fail(m, "failed to allocate subcontext"); } + mappings = nvgpu_gr_ctx_mappings_create(g, tsg, vm); + if (mappings == NULL) { + unit_return_fail(m, "failed to allocate gr_ctx mappings"); + } + /* Fail gr_ctx allocation */ nvgpu_posix_enable_fault_injection(kmem_fi, true, 0); err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -207,7 +218,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, /* Fail patch_ctx allocation */ nvgpu_posix_enable_fault_injection(kmem_fi, true, 3); err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -216,7 +227,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, /* Fail circular buffer mapping */ nvgpu_posix_enable_fault_injection(kmem_fi, true, 8); err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -228,7 +239,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, g->ops.gr.init.fe_pwr_mode_force_on = test_fe_pwr_mode_force_on; fe_pwr_mode_count = 0; err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -237,7 +248,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, /* Fail second call to gops.gr.init.fe_pwr_mode_force_on */ fe_pwr_mode_count = 1; err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -252,7 +263,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, g->ops.gr.falcon.ctrl_ctxsw = test_falcon_ctrl_ctxsw; ctrl_ctxsw_count = -1; err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -265,7 +276,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, g->ops.gr.init.wait_idle = test_gr_wait_idle; gr_wait_idle_count = 2; err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -278,7 +289,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, g->ops.gr.init.load_sw_bundle_init = test_load_sw_bundle; load_sw_bundle_count = 0; err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -288,7 +299,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, g->ops.gr.init.load_sw_veid_bundle = test_load_sw_bundle; load_sw_bundle_count = 1; err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -308,7 +319,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, g->ops.gr.init.wait_idle = test_gr_wait_idle; gr_wait_idle_count = 4; err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -323,7 +334,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, */ ctrl_ctxsw_count = 1; err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -335,7 +346,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, */ ctrl_ctxsw_count = 2; err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -347,7 +358,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, /* Fail golden context verification */ nvgpu_posix_enable_fault_injection(golden_ctx_verif_fi, true, 0); err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -358,7 +369,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, /* Finally, successful obj_ctx allocation */ err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err != 0) { unit_return_fail(m, "failed to allocate obj_ctx"); @@ -371,14 +382,14 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, /* Reallocation with golden image already created */ err = nvgpu_gr_obj_ctx_alloc(g, golden_image, global_desc, desc, - config, gr_ctx, subctx, vm, &inst_block, + config, gr_ctx, subctx, mappings, &inst_block, VOLTA_COMPUTE_A, 0, false, false); if (err != 0) { unit_return_fail(m, "failed to re-allocate obj_ctx"); } /* Set preemption mode with invalid compute class */ - err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, config, desc, gr_ctx, vm, + err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, config, desc, gr_ctx, VOLTA_DMA_COPY_A, 0, NVGPU_PREEMPTION_MODE_COMPUTE_CTA); if (err == 0) { unit_return_fail(m, "unexpected success"); @@ -386,8 +397,7 @@ int test_gr_obj_ctx_error_injection(struct unit_module *m, /* Cleanup */ nvgpu_gr_subctx_free(g, subctx, vm); - nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx); - nvgpu_gr_ctx_free(g, gr_ctx, global_desc, vm); + nvgpu_gr_ctx_free(g, gr_ctx, global_desc); nvgpu_free_gr_ctx_struct(g, gr_ctx); nvgpu_gr_ctx_desc_free(g, desc); nvgpu_gr_obj_ctx_deinit(g, golden_image); diff --git a/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.h b/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.h index 7371c7814..10fc4e272 100644 --- a/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.h +++ b/userspace/units/gr/obj_ctx/nvgpu-gr-obj-ctx.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -53,7 +53,6 @@ struct unit_module; * nvgpu_gr_subctx_free, * nvgpu_gr_obj_ctx_commit_inst, * nvgpu_gr_obj_ctx_commit_inst_gpu_va, - * nvgpu_gr_ctx_get_patch_ctx_mem, * nvgpu_gr_subctx_get_ctx_header, * nvgpu_gr_subctx_load_ctx_header, * nvgpu_gr_global_ctx_get_size, diff --git a/userspace/units/gr/setup/nvgpu-gr-setup.c b/userspace/units/gr/setup/nvgpu-gr-setup.c index 992dde595..cee8cfdde 100644 --- a/userspace/units/gr/setup/nvgpu-gr-setup.c +++ b/userspace/units/gr/setup/nvgpu-gr-setup.c @@ -584,7 +584,7 @@ static void gr_setup_fake_free_obj_ctx(struct unit_module *m, struct gk20a *g) g->ops.gr.setup.free_subctx(gr_setup_ch); nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS, true); - g->ops.gr.setup.free_gr_ctx(g, 0, 0); + g->ops.gr.setup.free_gr_ctx(g, NULL); gr_setup_ch->subctx = gr_subctx; } diff --git a/userspace/units/gr/setup/nvgpu-gr-setup.h b/userspace/units/gr/setup/nvgpu-gr-setup.h index a85deb98a..51569f787 100644 --- a/userspace/units/gr/setup/nvgpu-gr-setup.h +++ b/userspace/units/gr/setup/nvgpu-gr-setup.h @@ -55,7 +55,7 @@ struct unit_module; * nvgpu_gr_ctx_get_ctx_mem, * nvgpu_gr_ctx_set_tsgid, * nvgpu_gr_ctx_get_tsgid, - * nvgpu_gr_ctx_get_global_ctx_va, + * nvgpu_gr_ctx_mappings_get_global_ctx_va, * gops_gr_setup.alloc_obj_ctx, * nvgpu_gr_ctx_load_golden_ctx_image, * gm20b_ctxsw_prog_set_patch_addr,