gpu: nvgpu: Add subctx programming for MIG

This CL covers the following code changes,
1) Added api to init inst_block for more than one subctxs.
2) Added logic to limit the subctx bind based on
   max. VEID count allocated to a gr instance.
3) Renamed nvgpu_grmgr_get_gr_runlist_id.

JIRA NVGPU-5647

Change-Id: Ifec8164a9e5f46fbd0538c3dd50e19ee63667a54
Signed-off-by: Lakshmanan M <lm@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2418463
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Rajesh Devaraj <rdevaraj@nvidia.com>
Reviewed-by: Dinesh T <dt@nvidia.com>
Reviewed-by: Vaibhav Kachore <vkachore@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Lakshmanan M
2020-09-23 15:42:07 +05:30
committed by Alex Waterman
parent d2bb5df3c7
commit c0e2dc5b74
23 changed files with 133 additions and 31 deletions

View File

@@ -141,9 +141,20 @@ void nvgpu_channel_commit_va(struct nvgpu_channel *c)
nvgpu_log_fn(g, " ");
if (g->ops.mm.init_inst_block_for_subctxs != NULL) {
u32 subctx_count = nvgpu_channel_get_max_subctx_count(c);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_mig,
"chid: %d max_subctx_count[%u] ",
c->chid, subctx_count);
g->ops.mm.init_inst_block_for_subctxs(&c->inst_block, c->vm,
c->vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG],
subctx_count);
} else {
g->ops.mm.init_inst_block(&c->inst_block, c->vm,
c->vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG]);
}
}
int nvgpu_channel_update_runlist(struct nvgpu_channel *c, bool add)
{

View File

@@ -390,7 +390,8 @@ bool nvgpu_grmgr_is_valid_runlist_id(struct gk20a *g,
return false;
}
u32 nvgpu_grmgr_get_gr_runlist_id(struct gk20a *g, u32 gpu_instance_id)
u32 nvgpu_grmgr_get_gpu_instance_runlist_id(struct gk20a *g,
u32 gpu_instance_id)
{
if (gpu_instance_id < g->mig.num_gpu_instances) {
struct nvgpu_gpu_instance *gpu_instance =

View File

@@ -53,7 +53,8 @@ int gv11b_ramfc_setup(struct nvgpu_channel *ch, u64 gpfifo_base,
nvgpu_log_info(g, "%llu %u", pbdma_acquire_timeout,
g->ops.pbdma.acquire_val(pbdma_acquire_timeout));
g->ops.ramin.init_subctx_pdb(g, mem, ch->vm->pdb.mem, replayable);
g->ops.ramin.init_subctx_pdb(g, mem, ch->vm->pdb.mem,
replayable, nvgpu_channel_get_max_subctx_count(ch));
nvgpu_mem_wr32(g, mem, ram_fc_gp_base_w(),
g->ops.pbdma.get_gp_base(gpfifo_base));

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -32,7 +32,7 @@ void gv11b_ramin_set_gr_ptr(struct gk20a *g,
struct nvgpu_mem *inst_block, u64 gpu_va);
void gv11b_ramin_init_subctx_pdb(struct gk20a *g,
struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem,
bool replayable);
bool replayable, u32 max_subctx_count);
void gv11b_ramin_set_eng_method_buffer(struct gk20a *g,
struct nvgpu_mem *inst_block, u64 gpu_va);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -46,27 +46,45 @@ void gv11b_ramin_set_gr_ptr(struct gk20a *g,
}
static void gv11b_subctx_commit_valid_mask(struct gk20a *g,
struct nvgpu_mem *inst_block)
struct nvgpu_mem *inst_block, u32 max_subctx_count)
{
u32 id;
u32 subctx_count = max_subctx_count;
for (id = 0U; id < max_subctx_count; id += 32U) {
u32 subctx_mask_max_bit = ((subctx_count < 32U) ?
(subctx_count % 32U) : 0U);
u32 subctx_mask = U32_MAX;
if (subctx_mask_max_bit != 0U) {
subctx_mask = nvgpu_safe_sub_u32(
BIT32(subctx_mask_max_bit), 1U);
}
/* Make all subctx pdbs valid */
for (id = 0U; id < ram_in_sc_pdb_valid__size_1_v(); id += 32U) {
nvgpu_mem_wr32(g, inst_block,
ram_in_sc_pdb_valid_long_w(id), U32_MAX);
ram_in_sc_pdb_valid_long_w(id), subctx_mask);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_mig,
"id[%d] max_subctx_count[%u] subctx_mask_max_bit[%u] "
"subctx_count[%u] subctx_mask[%x] ",
id, max_subctx_count, subctx_mask_max_bit,
subctx_count, subctx_mask);
if (subctx_count > 32U) {
subctx_count = nvgpu_safe_sub_u32(subctx_count, 32U);
}
}
}
static void gv11b_subctx_commit_pdb(struct gk20a *g,
struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem,
bool replayable)
bool replayable, u32 max_subctx_count)
{
u32 lo, hi;
u32 subctx_id = 0;
u32 format_word;
u32 pdb_addr_lo, pdb_addr_hi;
u64 pdb_addr;
u32 max_subctx_count = ram_in_sc_page_dir_base_target__size_1_v();
u32 aperture = nvgpu_aperture_mask(g, pdb_mem,
ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(),
ram_in_sc_page_dir_base_target_sys_mem_coh_v(),
@@ -100,10 +118,11 @@ static void gv11b_subctx_commit_pdb(struct gk20a *g,
void gv11b_ramin_init_subctx_pdb(struct gk20a *g,
struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem,
bool replayable)
bool replayable, u32 max_subctx_count)
{
gv11b_subctx_commit_pdb(g, inst_block, pdb_mem, replayable);
gv11b_subctx_commit_valid_mask(g, inst_block);
gv11b_subctx_commit_pdb(g, inst_block, pdb_mem, replayable,
max_subctx_count);
gv11b_subctx_commit_valid_mask(g, inst_block, max_subctx_count);
}

View File

@@ -1053,6 +1053,7 @@ static const struct gops_mm gv11b_ops_mm = {
.setup_hw = nvgpu_mm_setup_hw,
.is_bar1_supported = gv11b_mm_is_bar1_supported,
.init_inst_block = gv11b_mm_init_inst_block,
.init_inst_block_for_subctxs = gv11b_mm_init_inst_block_for_subctxs,
.init_bar2_vm = gp10b_mm_init_bar2_vm,
.remove_bar2_vm = gp10b_mm_remove_bar2_vm,
.bar1_map_userd = NULL,

View File

@@ -1104,6 +1104,7 @@ static const struct gops_mm tu104_ops_mm = {
.setup_hw = nvgpu_mm_setup_hw,
.is_bar1_supported = gv11b_mm_is_bar1_supported,
.init_inst_block = gv11b_mm_init_inst_block,
.init_inst_block_for_subctxs = gv11b_mm_init_inst_block_for_subctxs,
.init_bar2_vm = gp10b_mm_init_bar2_vm,
.remove_bar2_vm = gp10b_mm_remove_bar2_vm,
.get_flush_retries = tu104_mm_get_flush_retries,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -30,6 +30,8 @@ struct vm_gk20a;
void gv11b_mm_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
u32 big_page_size);
void gv11b_mm_init_inst_block_for_subctxs(struct nvgpu_mem *inst_block,
struct vm_gk20a *vm, u32 big_page_size, u32 max_subctx_count);
bool gv11b_mm_is_bar1_supported(struct gk20a *g);
#endif

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -42,7 +42,30 @@ void gv11b_mm_init_inst_block(struct nvgpu_mem *inst_block,
}
if (g->ops.ramin.init_subctx_pdb != NULL) {
g->ops.ramin.init_subctx_pdb(g, inst_block, vm->pdb.mem, false);
g->ops.ramin.init_subctx_pdb(g, inst_block, vm->pdb.mem, false,
1U);
}
}
void gv11b_mm_init_inst_block_for_subctxs(struct nvgpu_mem *inst_block,
struct vm_gk20a *vm, u32 big_page_size, u32 max_subctx_count)
{
struct gk20a *g = gk20a_from_vm(vm);
u64 pdb_addr = nvgpu_pd_gpu_addr(g, &vm->pdb);
nvgpu_log_info(g, "inst block phys = 0x%llx, kv = 0x%p",
nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
g->ops.ramin.init_pdb(g, inst_block, pdb_addr, vm->pdb.mem);
if ((big_page_size != 0U) &&
(g->ops.ramin.set_big_page_size != NULL)) {
g->ops.ramin.set_big_page_size(g, inst_block, big_page_size);
}
if (g->ops.ramin.init_subctx_pdb != NULL) {
g->ops.ramin.init_subctx_pdb(g, inst_block, vm->pdb.mem, false,
max_subctx_count);
}
}

View File

@@ -756,6 +756,7 @@ static const struct gops_mm vgpu_gv11b_ops_mm = {
.setup_hw = NULL,
.is_bar1_supported = gv11b_mm_is_bar1_supported,
.init_inst_block = gv11b_mm_init_inst_block,
.init_inst_block_for_subctxs = gv11b_mm_init_inst_block_for_subctxs,
.init_bar2_vm = gp10b_mm_init_bar2_vm,
.remove_bar2_vm = gp10b_mm_remove_bar2_vm,
.bar1_map_userd = vgpu_mm_bar1_map_userd,

View File

@@ -1178,4 +1178,11 @@ static inline void nvgpu_channel_set_wdt_debug_dump(struct nvgpu_channel *ch,
bool dump) {}
#endif
/**
* @brief Get maximum sub context count.
*
* @param ch [in] Channel pointer.
*/
u32 nvgpu_channel_get_max_subctx_count(struct nvgpu_channel *ch);
#endif

View File

@@ -540,6 +540,23 @@ struct gops_mm {
void (*init_inst_block)(struct nvgpu_mem *inst_block,
struct vm_gk20a *vm, u32 big_page_size);
/**
* @brief HAL to initialize the instance block memory.
* (for more than one subctx)
*
* @param inst_block [in] Pointer to instance block memory.
* @param vm [in] Pointer to virtual memory context.
* @param big_page_size [in] Big page size supported by GMMU.
* @param max_subctx_count [in] Max number of sub context.
*
* Initializes the instance block memory:
* - Configures the pdb base, big page size and
* sub context's pdb base in context's instance block memory.
*/
void (*init_inst_block_for_subctxs)(struct nvgpu_mem *inst_block,
struct vm_gk20a *vm, u32 big_page_size,
u32 max_subctx_count);
/**
* @brief HAL to get the maximum flush retry counts.
*

View File

@@ -98,6 +98,7 @@ struct gops_ramin {
* @param pdb_mem [in] Memory descriptor of PDB.
* @param replayable [in] Indicates if errors are replayable
* for this Instance Block.
* @param max_subctx_count [in] Max number of sub context.
*
* This HAL configures PDB for all sub-contexts of Instance Block:
* - Get max number of sub-contexts from HW.
@@ -116,7 +117,7 @@ struct gops_ramin {
void (*init_subctx_pdb)(struct gk20a *g,
struct nvgpu_mem *inst_block,
struct nvgpu_mem *pdb_mem,
bool replayable);
bool replayable, u32 max_subctx_count);
/**
* @brief Instance Block shift.

View File

@@ -42,7 +42,8 @@ u32 nvgpu_grmgr_get_gr_gpc_phys_id(struct gk20a *g, u32 gr_instance_id,
u32 nvgpu_grmgr_get_gr_instance_id(struct gk20a *g, u32 gpu_instance_id);
bool nvgpu_grmgr_is_valid_runlist_id(struct gk20a *g,
u32 gpu_instance_id, u32 runlist_id);
u32 nvgpu_grmgr_get_gr_runlist_id(struct gk20a *g, u32 gpu_instance_id);
u32 nvgpu_grmgr_get_gpu_instance_runlist_id(struct gk20a *g,
u32 gpu_instance_id);
u32 nvgpu_grmgr_get_gr_instance_id_for_syspipe(struct gk20a *g,
u32 gr_syspipe_id);
u32 nvgpu_grmgr_get_gpu_instance_max_veid_count(struct gk20a *g,

View File

@@ -22,6 +22,7 @@
#include <nvgpu/channel.h>
#include <nvgpu/dma.h>
#include <nvgpu/fence.h>
#include <nvgpu/grmgr.h>
/*
* This is required for nvgpu_vm_find_buf() which is used in the tracing
@@ -628,6 +629,14 @@ u32 nvgpu_get_gpfifo_entry_size(void)
return sizeof(struct nvgpu_gpfifo_entry);
}
u32 nvgpu_channel_get_max_subctx_count(struct nvgpu_channel *ch)
{
struct gk20a *g = ch->g;
return nvgpu_grmgr_get_gpu_instance_max_veid_count(g,
0U);
}
#ifdef CONFIG_DEBUG_FS
static void trace_write_pushbuffer(struct nvgpu_channel *c,
struct nvgpu_gpfifo_entry *g)

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -30,3 +30,8 @@ u32 nvgpu_get_gpfifo_entry_size(void)
*/
return 8;
}
u32 nvgpu_channel_get_max_subctx_count(struct nvgpu_channel *ch)
{
return 64;
}

View File

@@ -402,6 +402,7 @@ nvgpu_free_fixed
nvgpu_free_gr_ctx_struct
nvgpu_get
nvgpu_get_gpfifo_entry_size
nvgpu_channel_get_max_subctx_count
nvgpu_get_pte
nvgpu_gmmu_default_big_page_size
nvgpu_gmmu_init_page_table

View File

@@ -416,6 +416,7 @@ nvgpu_free_fixed
nvgpu_free_gr_ctx_struct
nvgpu_get
nvgpu_get_gpfifo_entry_size
nvgpu_channel_get_max_subctx_count
nvgpu_get_pte
nvgpu_gmmu_default_big_page_size
nvgpu_gmmu_init_page_table

View File

@@ -501,7 +501,7 @@ test_channel_suspend_resume_serviceable_chs.suspend_resume=0
test_channel_sw_quiesce.sw_quiesce=0
test_fifo_init_support.init_support=0
test_fifo_remove_support.remove_support=0
test_nvgpu_channel_commit_va.channel_commit_va=0
test_nvgpu_channel_commit_va.channel_commit_va=2
test_nvgpu_get_gpfifo_entry_size.get_gpfifo_entry_size=0
test_trace_write_pushbuffers.trace_write_pushbuffers=0
@@ -1041,7 +1041,7 @@ test_gm20b_ramin_set_big_page_size.set_big_page_size=0
test_gp10b_ramin_init_pdb.init_pdb=0
[ramin_gv11b_fusa]
test_gv11b_ramin_init_subctx_pdb.init_subctx_pdb=0
test_gv11b_ramin_init_subctx_pdb.init_subctx_pdb=2
test_gv11b_ramin_set_eng_method_buffer.set_eng_method_buf=0
test_gv11b_ramin_set_gr_ptr.set_gr_ptr=0
@@ -1083,7 +1083,7 @@ test_map_buffer_error_cases.map_buffer_error_cases=0
test_nvgpu_vm_alloc_va.nvgpu_vm_alloc_va=0
test_vm_area_error_cases.vm_area_error_cases=0
test_vm_aspace_id.vm_aspace_id=0
test_vm_bind.vm_bind=0
test_vm_bind.vm_bind=2
test_gk20a_from_vm.gk20a_from_vm=0
test_vm_pde_coverage_bit_count.vm_pde_coverage_bit_count=0
test_nvgpu_insert_mapped_buf.nvgpu_insert_mapped_buf=0

View File

@@ -1979,7 +1979,7 @@ struct unit_module_test nvgpu_channel_tests[] = {
UNIT_TEST(channel_put_warn, test_channel_put_warn, &unit_ctx, 0),
UNIT_TEST(referenceable_cleanup, test_ch_referenceable_cleanup, &unit_ctx, 0),
UNIT_TEST(abort_cleanup, test_channel_abort_cleanup, &unit_ctx, 0),
UNIT_TEST(channel_commit_va, test_nvgpu_channel_commit_va, &unit_ctx, 0),
UNIT_TEST(channel_commit_va, test_nvgpu_channel_commit_va, &unit_ctx, 2),
UNIT_TEST(get_gpfifo_entry_size, test_nvgpu_get_gpfifo_entry_size, &unit_ctx, 0),
UNIT_TEST(trace_write_pushbuffers, test_trace_write_pushbuffers, &unit_ctx, 0),
UNIT_TEST(remove_support, test_fifo_remove_support, &unit_ctx, 0),

View File

@@ -133,7 +133,7 @@ static int stub_ramfc_commit_userd(struct nvgpu_channel *ch)
static void stub_ramin_init_subctx_pdb(struct gk20a *g,
struct nvgpu_mem *inst_block, struct nvgpu_mem *pdb_mem,
bool replayable)
bool replayable, u32 max_subctx_count)
{
global_count++;
}

View File

@@ -147,7 +147,7 @@ int test_gv11b_ramin_init_subctx_pdb(struct unit_module *m, struct gk20a *g,
}
gv11b_ramin_init_subctx_pdb(g, &inst_block, &pdb_mem,
replayable);
replayable, 64);
for (subctx_id = 0; subctx_id < max_subctx_count; subctx_id++) {
addr_lo = ram_in_sc_page_dir_base_vol_w(subctx_id);
@@ -215,7 +215,7 @@ done:
struct unit_module_test ramin_gv11b_fusa_tests[] = {
UNIT_TEST(set_gr_ptr, test_gv11b_ramin_set_gr_ptr, NULL, 0),
UNIT_TEST(init_subctx_pdb, test_gv11b_ramin_init_subctx_pdb, NULL, 0),
UNIT_TEST(init_subctx_pdb, test_gv11b_ramin_init_subctx_pdb, NULL, 2),
UNIT_TEST(set_eng_method_buf, test_gv11b_ramin_set_eng_method_buffer, NULL, 0),
};

View File

@@ -2076,7 +2076,7 @@ struct unit_module_test vm_tests[] = {
UNIT_TEST(init_error_paths, test_init_error_paths, NULL, 0),
UNIT_TEST(map_buffer_error_cases, test_map_buffer_error_cases, NULL, 0),
UNIT_TEST(nvgpu_vm_alloc_va, test_nvgpu_vm_alloc_va, NULL, 0),
UNIT_TEST(vm_bind, test_vm_bind, NULL, 0),
UNIT_TEST(vm_bind, test_vm_bind, NULL, 2),
UNIT_TEST(vm_aspace_id, test_vm_aspace_id, NULL, 0),
UNIT_TEST(vm_area_error_cases, test_vm_area_error_cases, NULL, 0),
UNIT_TEST_REQ("NVGPU-RQCD-45.C2",