gpu: nvgpu: map global_ctx buffers from gr/ctx unit

Currently all the global contex buffers are mapped into each graphics
context. Move all the mapping/unmapping support to gr/ctx unit since
all the mappings are owned by context itself

Add nvgpu_gr_ctx_map_global_ctx_buffers() that maps all the global
context buffers into given gr_ctx
Add nvgpu_gr_ctx_get_global_ctx_va() that returns VA of the mapping
for requested index

Remove g->ops.gr.map_global_ctx_buffers() since it is no longer
required. Also remove below APIs
gr_gk20a_map_global_ctx_buffers()
gr_gk20a_unmap_global_ctx_buffers()
gr_tu104_map_global_ctx_buffers()

Remove global_ctx_buffer_size from nvgpu_gr_ctx since it is no
longer used

Jira NVGPU-1527

Change-Id: Ic185c03757706171db0f5a925e13a118ebbdeb48
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1987739
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2019-01-04 20:08:50 +05:30
committed by mobile promotions
parent 1c17ae310c
commit 4883f14fbb
17 changed files with 216 additions and 222 deletions

View File

@@ -21,10 +21,16 @@
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/gr/global_ctx.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/vm.h>
#include <nvgpu/gmmu.h>
static void nvgpu_gr_ctx_unmap_global_ctx_buffers(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
struct vm_gk20a *vm);
struct nvgpu_gr_ctx_desc *
nvgpu_gr_ctx_desc_alloc(struct gk20a *g)
{
@@ -82,11 +88,16 @@ err_free_mem:
}
void nvgpu_gr_ctx_free(struct gk20a *g,
struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
struct vm_gk20a *vm)
{
nvgpu_log_fn(g, " ");
if (gr_ctx != NULL) {
nvgpu_gr_ctx_unmap_global_ctx_buffers(g, gr_ctx,
global_ctx_buffer, vm);
nvgpu_gr_ctx_free_pm_ctx(g, vm, gr_ctx);
nvgpu_gr_ctx_free_patch_ctx(g, vm, gr_ctx);
@@ -291,3 +302,154 @@ fail_free_preempt:
fail:
return err;
}
static void nvgpu_gr_ctx_unmap_global_ctx_buffers(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
struct vm_gk20a *vm)
{
u64 *g_bfr_va = gr_ctx->global_ctx_buffer_va;
int *g_bfr_index = gr_ctx->global_ctx_buffer_index;
u32 i;
nvgpu_log_fn(g, " ");
for (i = 0; i < NVGPU_GR_CTX_VA_COUNT; i++) {
nvgpu_gr_global_ctx_buffer_unmap(global_ctx_buffer,
g_bfr_index[i], vm, g_bfr_va[i]);
}
(void) memset(g_bfr_va, 0, sizeof(gr_ctx->global_ctx_buffer_va));
(void) memset(g_bfr_index, 0, sizeof(gr_ctx->global_ctx_buffer_index));
gr_ctx->global_ctx_buffer_mapped = false;
}
int nvgpu_gr_ctx_map_global_ctx_buffers(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
struct vm_gk20a *vm, bool vpr)
{
u64 *g_bfr_va;
int *g_bfr_index;
u64 gpu_va = 0ULL;
nvgpu_log_fn(g, " ");
g_bfr_va = gr_ctx->global_ctx_buffer_va;
g_bfr_index = gr_ctx->global_ctx_buffer_index;
/* Circular Buffer */
if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR)) {
gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR,
vm, NVGPU_VM_MAP_CACHEABLE, true);
g_bfr_index[NVGPU_GR_CTX_CIRCULAR_VA] = NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR;
} else {
gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_CIRCULAR,
vm, NVGPU_VM_MAP_CACHEABLE, true);
g_bfr_index[NVGPU_GR_CTX_CIRCULAR_VA] = NVGPU_GR_GLOBAL_CTX_CIRCULAR;
}
if (gpu_va == 0ULL) {
goto clean_up;
}
g_bfr_va[NVGPU_GR_CTX_CIRCULAR_VA] = gpu_va;
/* Attribute Buffer */
if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR)) {
gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR,
vm, NVGPU_VM_MAP_CACHEABLE, false);
g_bfr_index[NVGPU_GR_CTX_ATTRIBUTE_VA] = NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR;
} else {
gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_ATTRIBUTE,
vm, NVGPU_VM_MAP_CACHEABLE, false);
g_bfr_index[NVGPU_GR_CTX_ATTRIBUTE_VA] = NVGPU_GR_GLOBAL_CTX_ATTRIBUTE;
}
if (gpu_va == 0ULL) {
goto clean_up;
}
g_bfr_va[NVGPU_GR_CTX_ATTRIBUTE_VA] = gpu_va;
/* Page Pool */
if (vpr && nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR)) {
gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR,
vm, NVGPU_VM_MAP_CACHEABLE, true);
g_bfr_index[NVGPU_GR_CTX_PAGEPOOL_VA] = NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR;
} else {
gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_PAGEPOOL,
vm, NVGPU_VM_MAP_CACHEABLE, true);
g_bfr_index[NVGPU_GR_CTX_PAGEPOOL_VA] = NVGPU_GR_GLOBAL_CTX_PAGEPOOL;
}
if (gpu_va == 0ULL) {
goto clean_up;
}
g_bfr_va[NVGPU_GR_CTX_PAGEPOOL_VA] = gpu_va;
/* Priv register Access Map */
gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP,
vm, 0, true);
if (gpu_va == 0ULL) {
goto clean_up;
}
g_bfr_va[NVGPU_GR_CTX_PRIV_ACCESS_MAP_VA] = gpu_va;
g_bfr_index[NVGPU_GR_CTX_PRIV_ACCESS_MAP_VA] = NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP;
#ifdef CONFIG_GK20A_CTXSW_TRACE
/* FECS trace buffer */
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER,
vm, 0, true);
if (gpu_va == 0ULL) {
goto clean_up;
}
g_bfr_va[NVGPU_GR_CTX_FECS_TRACE_BUFFER_VA] = gpu_va;
g_bfr_index[NVGPU_GR_CTX_FECS_TRACE_BUFFER_VA] =
NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER;
}
#endif
/* RTV circular buffer */
if (nvgpu_gr_global_ctx_buffer_ready(global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER)) {
gpu_va = nvgpu_gr_global_ctx_buffer_map(global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER,
vm, 0, true);
if (gpu_va == 0ULL) {
goto clean_up;
}
g_bfr_va[NVGPU_GR_CTX_RTV_CIRCULAR_BUFFER_VA] = gpu_va;
g_bfr_index[NVGPU_GR_CTX_RTV_CIRCULAR_BUFFER_VA] =
NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER;
}
gr_ctx->global_ctx_buffer_mapped = true;
return 0;
clean_up:
nvgpu_gr_ctx_unmap_global_ctx_buffers(g, gr_ctx, global_ctx_buffer, vm);
return -ENOMEM;
}
u64 nvgpu_gr_ctx_get_global_ctx_va(struct nvgpu_gr_ctx *gr_ctx,
enum nvgpu_gr_ctx_global_ctx_va index)
{
return gr_ctx->global_ctx_buffer_va[index];
}

View File

@@ -466,7 +466,8 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
}
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
addr = gr_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA];
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
NVGPU_GR_CTX_FECS_TRACE_BUFFER_VA);
nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
aperture_mask = 0;
} else {

View File

@@ -781,7 +781,8 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
}
/* global pagepool buffer */
addr = gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA] >>
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
NVGPU_GR_CTX_PAGEPOOL_VA) >>
U64(gr_scc_pagepool_base_addr_39_8_align_bits_v());
size = nvgpu_gr_global_ctx_get_size(gr->global_ctx_buffer,
@@ -798,7 +799,8 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, patch);
/* global bundle cb */
addr = gr_ctx->global_ctx_buffer_va[CIRCULAR_VA] >>
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
NVGPU_GR_CTX_CIRCULAR_VA) >>
U64(gr_scc_bundle_cb_base_addr_39_8_align_bits_v());
size = gr->bundle_cb_default_size;
@@ -809,7 +811,8 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
g->ops.gr.commit_global_bundle_cb(g, gr_ctx, addr, size, patch);
/* global attrib cb */
addr = gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA] >>
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
NVGPU_GR_CTX_ATTRIBUTE_VA) >>
U64(gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v());
nvgpu_log_info(g, "attrib cb addr : 0x%016llx", addr);
@@ -1700,7 +1703,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
g->ops.gr.ctxsw_prog.set_priv_access_map_config_mode(g, mem,
g->allow_all);
g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, mem,
gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
NVGPU_GR_CTX_PRIV_ACCESS_MAP_VA));
/* disable verif features */
g->ops.gr.ctxsw_prog.disable_verif_features(g, mem);
@@ -2329,133 +2333,6 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
return 0;
}
static void gr_gk20a_unmap_global_ctx_buffers(struct gk20a *g,
struct vm_gk20a *vm,
struct nvgpu_gr_ctx *gr_ctx)
{
u64 *g_bfr_va = gr_ctx->global_ctx_buffer_va;
int *g_bfr_index = gr_ctx->global_ctx_buffer_index;
u32 i;
nvgpu_log_fn(g, " ");
for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
nvgpu_gr_global_ctx_buffer_unmap(g->gr.global_ctx_buffer,
g_bfr_index[i], vm, g_bfr_va[i]);
}
(void) memset(g_bfr_va, 0, sizeof(gr_ctx->global_ctx_buffer_va));
(void) memset(g_bfr_index, 0, sizeof(gr_ctx->global_ctx_buffer_index));
gr_ctx->global_ctx_buffer_mapped = false;
}
int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, struct vm_gk20a *vm,
struct nvgpu_gr_ctx *gr_ctx, bool vpr)
{
u64 *g_bfr_va;
int *g_bfr_index;
struct gr_gk20a *gr = &g->gr;
u64 gpu_va = 0ULL;
nvgpu_log_fn(g, " ");
g_bfr_va = gr_ctx->global_ctx_buffer_va;
g_bfr_index = gr_ctx->global_ctx_buffer_index;
/* Circular Buffer */
if (vpr && nvgpu_gr_global_ctx_buffer_ready(gr->global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR)) {
gpu_va = nvgpu_gr_global_ctx_buffer_map(gr->global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR,
vm, NVGPU_VM_MAP_CACHEABLE, true);
g_bfr_index[CIRCULAR_VA] = NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR;
} else {
gpu_va = nvgpu_gr_global_ctx_buffer_map(gr->global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_CIRCULAR,
vm, NVGPU_VM_MAP_CACHEABLE, true);
g_bfr_index[CIRCULAR_VA] = NVGPU_GR_GLOBAL_CTX_CIRCULAR;
}
if (gpu_va == 0ULL) {
goto clean_up;
}
g_bfr_va[CIRCULAR_VA] = gpu_va;
/* Attribute Buffer */
if (vpr && nvgpu_gr_global_ctx_buffer_ready(gr->global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR)) {
gpu_va = nvgpu_gr_global_ctx_buffer_map(gr->global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR,
vm, NVGPU_VM_MAP_CACHEABLE, false);
g_bfr_index[ATTRIBUTE_VA] = NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR;
} else {
gpu_va = nvgpu_gr_global_ctx_buffer_map(gr->global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_ATTRIBUTE,
vm, NVGPU_VM_MAP_CACHEABLE, false);
g_bfr_index[ATTRIBUTE_VA] = NVGPU_GR_GLOBAL_CTX_ATTRIBUTE;
}
if (gpu_va == 0ULL) {
goto clean_up;
}
g_bfr_va[ATTRIBUTE_VA] = gpu_va;
/* Page Pool */
if (vpr && nvgpu_gr_global_ctx_buffer_ready(gr->global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR)) {
gpu_va = nvgpu_gr_global_ctx_buffer_map(gr->global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR,
vm, NVGPU_VM_MAP_CACHEABLE, true);
g_bfr_index[PAGEPOOL_VA] = NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR;
} else {
gpu_va = nvgpu_gr_global_ctx_buffer_map(gr->global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_PAGEPOOL,
vm, NVGPU_VM_MAP_CACHEABLE, true);
g_bfr_index[PAGEPOOL_VA] = NVGPU_GR_GLOBAL_CTX_PAGEPOOL;
}
if (gpu_va == 0ULL) {
goto clean_up;
}
g_bfr_va[PAGEPOOL_VA] = gpu_va;
/* Priv register Access Map */
gpu_va = nvgpu_gr_global_ctx_buffer_map(gr->global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP,
vm, 0, true);
if (gpu_va == 0ULL) {
goto clean_up;
}
g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
g_bfr_index[PRIV_ACCESS_MAP_VA] = NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP;
#ifdef CONFIG_GK20A_CTXSW_TRACE
/* FECS trace buffer */
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
gpu_va = nvgpu_gr_global_ctx_buffer_map(gr->global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER,
vm, 0, true);
if (!gpu_va)
goto clean_up;
g_bfr_va[FECS_TRACE_BUFFER_VA] = gpu_va;
g_bfr_index[FECS_TRACE_BUFFER_VA] =
NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER;
}
#endif
gr_ctx->global_ctx_buffer_mapped = true;
return 0;
clean_up:
gr_gk20a_unmap_global_ctx_buffers(g, vm, gr_ctx);
return -ENOMEM;
}
int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm)
{
@@ -2486,8 +2363,7 @@ void gr_gk20a_free_gr_ctx(struct gk20a *g,
g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g, &gr_ctx->mem);
}
gr_gk20a_unmap_global_ctx_buffers(g, vm, gr_ctx);
nvgpu_gr_ctx_free(g, vm, gr_ctx);
nvgpu_gr_ctx_free(g, gr_ctx, g->gr.global_ctx_buffer, vm);
}
}
@@ -2576,8 +2452,8 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
class_num, flags);
/* map global buffer to channel gpu_va and commit */
err = g->ops.gr.map_global_ctx_buffers(g, tsg->vm, gr_ctx,
c->vpr);
err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx,
g->gr.global_ctx_buffer, tsg->vm, c->vpr);
if (err != 0) {
nvgpu_err(g,
"fail to map global ctx buffer");

View File

@@ -694,8 +694,6 @@ int gk20a_gr_handle_notify_pending(struct gk20a *g,
struct gr_gk20a_isr_data *isr_data);
int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, struct vm_gk20a *vm,
struct nvgpu_gr_ctx *gr_ctx, bool vpr);
int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, bool patch);

View File

@@ -328,7 +328,6 @@ static const struct gpu_ops gm20b_ops = {
.split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr,
.fecs_ctxsw_mailbox_size = gr_fecs_ctxsw_mailbox__size_1_v,
.alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers,
.map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers,
.commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers,
.get_offset_in_gpccs_segment =
gr_gk20a_get_offset_in_gpccs_segment,

View File

@@ -366,7 +366,6 @@ static const struct gpu_ops gp10b_ops = {
.split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr,
.fecs_ctxsw_mailbox_size = gr_fecs_ctxsw_mailbox__size_1_v,
.alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers,
.map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers,
.commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers,
.get_offset_in_gpccs_segment =
gr_gk20a_get_offset_in_gpccs_segment,

View File

@@ -489,7 +489,6 @@ static const struct gpu_ops gv100_ops = {
.split_fbpa_broadcast_addr = gr_gv100_split_fbpa_broadcast_addr,
.fecs_ctxsw_mailbox_size = gr_fecs_ctxsw_mailbox__size_1_v,
.alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers,
.map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers,
.commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers,
.get_nonpes_aware_tpc = gr_gv11b_get_nonpes_aware_tpc,
.get_offset_in_gpccs_segment =

View File

@@ -445,7 +445,6 @@ static const struct gpu_ops gv11b_ops = {
.split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr,
.fecs_ctxsw_mailbox_size = gr_fecs_ctxsw_mailbox__size_1_v,
.alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers,
.map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers,
.commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers,
.get_nonpes_aware_tpc = gr_gv11b_get_nonpes_aware_tpc,
.get_offset_in_gpccs_segment =

View File

@@ -100,7 +100,6 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
{
struct nvgpu_mem *ctxheader = &c->ctx_header;
struct gk20a *g = c->g;
int ret = 0;
struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *gr_ctx;
@@ -115,7 +114,8 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
/* set priv access map */
g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, ctxheader,
gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
NVGPU_GR_CTX_PRIV_ACCESS_MAP_VA));
g->ops.gr.ctxsw_prog.set_patch_addr(g, ctxheader,
gr_ctx->patch_ctx.mem.gpu_va);
@@ -129,7 +129,7 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
g->ops.gr.ctxsw_prog.set_type_per_veid_header(g, ctxheader);
return ret;
return 0;
}
static void gv11b_subctx_commit_valid_mask(struct vm_gk20a *vm,

View File

@@ -517,9 +517,6 @@ struct gpu_ops {
u32 (*fecs_ctxsw_mailbox_size)(void);
int (*init_sw_bundle64)(struct gk20a *g);
int (*alloc_global_ctx_buffers)(struct gk20a *g);
int (*map_global_ctx_buffers)(struct gk20a *g,
struct vm_gk20a *vm,
struct nvgpu_gr_ctx *gr_ctx, bool vpr);
int (*commit_global_ctx_buffers)(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, bool patch);
u32 (*get_nonpes_aware_tpc)(struct gk20a *g, u32 gpc, u32 tpc);

View File

@@ -41,15 +41,17 @@ enum nvgpu_gr_ctx_index {
NVGPU_GR_CTX_COUNT
};
/* either ATTRIBUTE or ATTRIBUTE_VPR maps to ATTRIBUTE_VA */
enum /*global_ctx_buffer_va */ {
CIRCULAR_VA = 0,
PAGEPOOL_VA = 1,
ATTRIBUTE_VA = 2,
PRIV_ACCESS_MAP_VA = 3,
RTV_CIRCULAR_BUFFER_VA = 4,
FECS_TRACE_BUFFER_VA = 5,
NR_GLOBAL_CTX_BUF_VA = 6
/*
* either ATTRIBUTE or ATTRIBUTE_VPR maps to NVGPU_GR_CTX_ATTRIBUTE_VA
*/
enum nvgpu_gr_ctx_global_ctx_va {
NVGPU_GR_CTX_CIRCULAR_VA = 0,
NVGPU_GR_CTX_PAGEPOOL_VA = 1,
NVGPU_GR_CTX_ATTRIBUTE_VA = 2,
NVGPU_GR_CTX_PRIV_ACCESS_MAP_VA = 3,
NVGPU_GR_CTX_RTV_CIRCULAR_BUFFER_VA = 4,
NVGPU_GR_CTX_FECS_TRACE_BUFFER_VA = 5,
NVGPU_GR_CTX_VA_COUNT = 6
};
struct patch_desc {
@@ -97,9 +99,8 @@ struct nvgpu_gr_ctx {
u64 virt_ctx;
#endif
u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA];
u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA];
int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA];
u64 global_ctx_buffer_va[NVGPU_GR_CTX_VA_COUNT];
int global_ctx_buffer_index[NVGPU_GR_CTX_VA_COUNT];
bool global_ctx_buffer_mapped;
u32 tsgid;
@@ -118,7 +119,9 @@ int nvgpu_gr_ctx_alloc(struct gk20a *g,
struct nvgpu_gr_ctx_desc *gr_ctx_desc,
struct vm_gk20a *vm);
void nvgpu_gr_ctx_free(struct gk20a *g,
struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
struct vm_gk20a *vm);
int nvgpu_gr_ctx_alloc_pm_ctx(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
@@ -143,4 +146,11 @@ int nvgpu_gr_ctx_alloc_ctxsw_buffers(struct gk20a *g,
struct nvgpu_gr_ctx_desc *gr_ctx_desc,
struct vm_gk20a *vm);
int nvgpu_gr_ctx_map_global_ctx_buffers(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
struct vm_gk20a *vm, bool vpr);
u64 nvgpu_gr_ctx_get_global_ctx_va(struct nvgpu_gr_ctx *gr_ctx,
enum nvgpu_gr_ctx_global_ctx_va index);
#endif /* NVGPU_INCLUDE_GR_CTX_H */

View File

@@ -153,48 +153,6 @@ int gr_tu104_alloc_global_ctx_buffers(struct gk20a *g)
return 0;
}
int gr_tu104_map_global_ctx_buffers(struct gk20a *g, struct vm_gk20a *vm,
struct nvgpu_gr_ctx *gr_ctx, bool vpr)
{
int err;
u64 *g_bfr_va;
int *g_bfr_index;
struct gr_gk20a *gr = &g->gr;
u64 gpu_va;
nvgpu_log_fn(g, " ");
g_bfr_va = gr_ctx->global_ctx_buffer_va;
g_bfr_index = gr_ctx->global_ctx_buffer_index;
/* RTV circular buffer */
gpu_va = nvgpu_gr_global_ctx_buffer_map(gr->global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER,
vm, 0, true);
if (gpu_va == 0ULL) {
return -ENOMEM;
}
g_bfr_va[RTV_CIRCULAR_BUFFER_VA] = gpu_va;
g_bfr_index[RTV_CIRCULAR_BUFFER_VA] =
NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER;
err = gr_gk20a_map_global_ctx_buffers(g, vm, gr_ctx, vpr);
if (err != 0) {
goto clean_up;
}
return 0;
clean_up:
nvgpu_err(g, "fail");
nvgpu_gr_global_ctx_buffer_unmap(gr->global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER,
vm, gpu_va);
return err;
}
static void gr_tu104_commit_rtv_circular_buffer(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
u64 addr, u32 size, u32 gfxpAddSize, bool patch)
@@ -234,7 +192,8 @@ int gr_tu104_commit_global_ctx_buffers(struct gk20a *g,
}
/* RTV circular buffer */
addr = gr_ctx->global_ctx_buffer_va[RTV_CIRCULAR_BUFFER_VA] >>
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
NVGPU_GR_CTX_RTV_CIRCULAR_BUFFER_VA) >>
U64(gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f());
size = (gr_scc_rm_rtv_cb_size_div_256b_default_f() +

View File

@@ -64,8 +64,6 @@ void gr_tu10x_create_sysfs(struct gk20a *g);
void gr_tu10x_remove_sysfs(struct gk20a *g);
int gr_tu104_alloc_global_ctx_buffers(struct gk20a *g);
int gr_tu104_map_global_ctx_buffers(struct gk20a *g, struct vm_gk20a *vm,
struct nvgpu_gr_ctx *gr_ctx, bool vpr);
int gr_tu104_commit_global_ctx_buffers(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, bool patch);

View File

@@ -516,7 +516,6 @@ static const struct gpu_ops tu104_ops = {
.init_sw_bundle64 = gr_tu104_init_sw_bundle64,
.fecs_ctxsw_mailbox_size = gr_fecs_ctxsw_mailbox__size_1_v,
.alloc_global_ctx_buffers = gr_tu104_alloc_global_ctx_buffers,
.map_global_ctx_buffers = gr_tu104_map_global_ctx_buffers,
.commit_global_ctx_buffers = gr_tu104_commit_global_ctx_buffers,
.get_nonpes_aware_tpc = gr_gv11b_get_nonpes_aware_tpc,
.get_offset_in_gpccs_segment =

View File

@@ -226,7 +226,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
gr_gm20b_get_pmm_per_chiplet_offset,
.split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr,
.alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers,
.map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers,
.commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers,
.get_offset_in_gpccs_segment =
gr_gk20a_get_offset_in_gpccs_segment,

View File

@@ -208,7 +208,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
if (!gpu_va)
goto clean_up;
g_bfr_va[CIRCULAR_VA] = gpu_va;
g_bfr_va[NVGPU_GR_CTX_CIRCULAR_VA] = gpu_va;
/* Attribute Buffer */
gpu_va = nvgpu_vm_alloc_va(ch_vm,
@@ -218,7 +218,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
if (!gpu_va)
goto clean_up;
g_bfr_va[ATTRIBUTE_VA] = gpu_va;
g_bfr_va[NVGPU_GR_CTX_ATTRIBUTE_VA] = gpu_va;
/* Page Pool */
gpu_va = nvgpu_vm_alloc_va(ch_vm,
@@ -227,7 +227,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
GMMU_PAGE_SIZE_KERNEL);
if (!gpu_va)
goto clean_up;
g_bfr_va[PAGEPOOL_VA] = gpu_va;
g_bfr_va[NVGPU_GR_CTX_PAGEPOOL_VA] = gpu_va;
/* Priv register Access Map */
gpu_va = nvgpu_vm_alloc_va(ch_vm,
@@ -236,7 +236,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
GMMU_PAGE_SIZE_KERNEL);
if (!gpu_va)
goto clean_up;
g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
g_bfr_va[NVGPU_GR_CTX_PRIV_ACCESS_MAP_VA] = gpu_va;
/* FECS trace Buffer */
#ifdef CONFIG_GK20A_CTXSW_TRACE
@@ -248,17 +248,17 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
if (!gpu_va)
goto clean_up;
g_bfr_va[FECS_TRACE_BUFFER_VA] = gpu_va;
g_bfr_va[NVGPU_GR_CTX_FECS_TRACE_BUFFER_VA] = gpu_va;
#endif
msg.cmd = TEGRA_VGPU_CMD_CHANNEL_MAP_GR_GLOBAL_CTX;
msg.handle = vgpu_get_handle(g);
p->handle = c->virt_ctx;
p->cb_va = g_bfr_va[CIRCULAR_VA];
p->attr_va = g_bfr_va[ATTRIBUTE_VA];
p->page_pool_va = g_bfr_va[PAGEPOOL_VA];
p->priv_access_map_va = g_bfr_va[PRIV_ACCESS_MAP_VA];
p->cb_va = g_bfr_va[NVGPU_GR_CTX_CIRCULAR_VA];
p->attr_va = g_bfr_va[NVGPU_GR_CTX_ATTRIBUTE_VA];
p->page_pool_va = g_bfr_va[NVGPU_GR_CTX_PAGEPOOL_VA];
p->priv_access_map_va = g_bfr_va[NVGPU_GR_CTX_PRIV_ACCESS_MAP_VA];
#ifdef CONFIG_GK20A_CTXSW_TRACE
p->fecs_trace_va = g_bfr_va[FECS_TRACE_BUFFER_VA];
p->fecs_trace_va = g_bfr_va[NVGPU_GR_CTX_FECS_TRACE_BUFFER_VA];
#endif
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
if (err || msg.ret)
@@ -268,7 +268,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
return 0;
clean_up:
for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
for (i = 0; i < NVGPU_GR_CTX_VA_COUNT; i++) {
if (g_bfr_va[i]) {
nvgpu_vm_free_va(ch_vm, g_bfr_va[i],
GMMU_PAGE_SIZE_KERNEL);
@@ -290,7 +290,7 @@ static void vgpu_gr_unmap_global_ctx_buffers(struct tsg_gk20a *tsg)
if (tsg->gr_ctx->global_ctx_buffer_mapped) {
/* server will unmap on channel close */
for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
for (i = 0; i < NVGPU_GR_CTX_VA_COUNT; i++) {
if (g_bfr_va[i]) {
nvgpu_vm_free_va(ch_vm, g_bfr_va[i],
GMMU_PAGE_SIZE_KERNEL);

View File

@@ -260,7 +260,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
gr_gv11b_get_pmm_per_chiplet_offset,
.split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr,
.alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers,
.map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers,
.commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers,
.get_nonpes_aware_tpc = gr_gv11b_get_nonpes_aware_tpc,
.get_offset_in_gpccs_segment =