gpu: nvgpu: move init_sm_id_table hal to hal.gr.config

Move init_sm_id_table hal to common.hal.gr.config. Two separate
hals for gm20b and gv100 are added.

JIRA NVGPU-1884

Change-Id: Id307542db67b103ec25b02b41fd3b9d9bd8f30f0
Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2073582
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Nitin Kumbhar
2019-03-15 11:11:23 +05:30
committed by mobile promotions
parent e649d19c65
commit 03e137b552
20 changed files with 343 additions and 282 deletions

View File

@@ -442,7 +442,8 @@ nvgpu-$(CONFIG_GK20A_VIDMEM) += \
common/mm/vidmem.o
nvgpu-y += \
hal/gr/config/gr_config_gm20b.o
hal/gr/config/gr_config_gm20b.o \
hal/gr/config/gr_config_gv100.o
nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
common/vgpu/ltc/ltc_vgpu.o \

View File

@@ -387,7 +387,8 @@ ifeq ($(NVGPU_DEBUGGER),1)
srcs += common/debugger.c
endif
srcs += hal/gr/config/gr_config_gm20b.c
srcs += hal/gr/config/gr_config_gm20b.c \
hal/gr/config/gr_config_gv100.c
ifeq ($(NVGPU_LS_PMU),1)
# Add LS PMU files which are required for normal build

View File

@@ -101,8 +101,8 @@ int nvgpu_gr_init_fs_state(struct gk20a *g)
return err;
}
if (g->ops.gr.init_sm_id_table != NULL) {
err = g->ops.gr.init_sm_id_table(g);
if (g->ops.gr.config.init_sm_id_table != NULL) {
err = g->ops.gr.config.init_sm_id_table(g);
if (err != 0) {
return err;
}

View File

@@ -160,7 +160,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.suspend_contexts = vgpu_gr_suspend_contexts,
.resume_contexts = vgpu_gr_resume_contexts,
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
.init_sm_id_table = vgpu_gr_init_sm_id_table,
.commit_inst = vgpu_gr_commit_inst,
.trigger_suspend = NULL,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -282,6 +281,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
},
.config = {
.get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
.init_sm_id_table = vgpu_gr_init_sm_id_table,
},
.zbc = {
.add_color = NULL,

View File

@@ -1151,11 +1151,11 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g)
int vgpu_gr_init_fs_state(struct gk20a *g)
{
if (!g->ops.gr.init_sm_id_table) {
if (!g->ops.gr.config.init_sm_id_table) {
return -EINVAL;
}
return g->ops.gr.init_sm_id_table(g);
return g->ops.gr.config.init_sm_id_table(g);
}
int vgpu_gr_update_pc_sampling(struct channel_gk20a *ch, bool enable)

View File

@@ -181,7 +181,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.suspend_contexts = vgpu_gr_suspend_contexts,
.resume_contexts = vgpu_gr_resume_contexts,
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
.init_sm_id_table = vgpu_gr_init_sm_id_table,
.commit_inst = vgpu_gr_commit_inst,
.trigger_suspend = NULL,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -329,6 +328,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
},
.config = {
.get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
.init_sm_id_table = vgpu_gr_init_sm_id_table,
},
.zbc = {
.add_color = NULL,

View File

@@ -768,30 +768,6 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
return 0;
}
int gr_gk20a_init_sm_id_table(struct gk20a *g)
{
u32 gpc, tpc;
u32 sm_id = 0;
for (tpc = 0;
tpc < nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config);
tpc++) {
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
if (tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) {
g->gr.sm_to_cluster[sm_id].tpc_index = tpc;
g->gr.sm_to_cluster[sm_id].gpc_index = gpc;
g->gr.sm_to_cluster[sm_id].sm_index = 0;
g->gr.sm_to_cluster[sm_id].global_tpc_index =
sm_id;
sm_id++;
}
}
}
g->gr.no_of_sm = sm_id;
return 0;
}
int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
{
struct gk20a *g = c->g;

View File

@@ -457,8 +457,6 @@ int gr_gk20a_resume_from_pause(struct gk20a *g);
int gr_gk20a_clear_sm_errors(struct gk20a *g);
u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g);
int gr_gk20a_init_sm_id_table(struct gk20a *g);
int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc);

View File

@@ -284,7 +284,6 @@ static const struct gpu_ops gm20b_ops = {
.suspend_contexts = gr_gk20a_suspend_contexts,
.resume_contexts = gr_gk20a_resume_contexts,
.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags,
.init_sm_id_table = gr_gk20a_init_sm_id_table,
.commit_inst = gr_gk20a_commit_inst,
.trigger_suspend = gr_gk20a_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -399,6 +398,7 @@ static const struct gpu_ops gm20b_ops = {
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
.get_pd_dist_skip_table_size =
gm20b_gr_config_get_pd_dist_skip_table_size,
.init_sm_id_table = gm20b_gr_config_init_sm_id_table,
},
.zbc = {
.add_color = gm20b_gr_zbc_add_color,

View File

@@ -307,7 +307,6 @@ static const struct gpu_ops gp10b_ops = {
.suspend_contexts = gr_gp10b_suspend_contexts,
.resume_contexts = gr_gk20a_resume_contexts,
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
.init_sm_id_table = gr_gk20a_init_sm_id_table,
.commit_inst = gr_gk20a_commit_inst,
.trigger_suspend = gr_gk20a_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -445,6 +444,7 @@ static const struct gpu_ops gp10b_ops = {
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
.get_pd_dist_skip_table_size =
gm20b_gr_config_get_pd_dist_skip_table_size,
.init_sm_id_table = gm20b_gr_config_init_sm_id_table,
},
#ifdef CONFIG_GK20A_CTXSW_TRACE
.fecs_trace = {

View File

@@ -40,250 +40,10 @@
#include <nvgpu/hw/gv100/hw_proj_gv100.h>
#include <nvgpu/hw/gv100/hw_perf_gv100.h>
/*
* Estimate performance if the given logical TPC in the given logical GPC were
* removed.
*/
static int gr_gv100_scg_estimate_perf(struct gk20a *g,
unsigned long *gpc_tpc_mask,
u32 disable_gpc_id, u32 disable_tpc_id,
int *perf)
{
struct gr_gk20a *gr = &g->gr;
int err = 0;
u32 scale_factor = 512U; /* Use fx23.9 */
u32 pix_scale = 1024U*1024U; /* Pix perf in [29:20] */
u32 world_scale = 1024U; /* World performance in [19:10] */
u32 tpc_scale = 1U; /* TPC balancing in [9:0] */
u32 scg_num_pes = 0U;
u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */
u32 average_tpcs = 0U; /* Average of # of TPCs per GPC */
u32 deviation; /* absolute diff between TPC# and
* average_tpcs, averaged across GPCs
*/
u32 norm_tpc_deviation; /* deviation/max_tpc_per_gpc */
u32 tpc_balance;
u32 scg_gpc_pix_perf;
u32 scg_world_perf;
u32 gpc_id;
u32 pes_id;
int diff;
bool is_tpc_removed_gpc = false;
bool is_tpc_removed_pes = false;
u32 max_tpc_gpc = 0U;
u32 num_tpc_mask;
u32 *num_tpc_gpc = nvgpu_kzalloc(g, sizeof(u32) *
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
if (num_tpc_gpc == NULL) {
return -ENOMEM;
}
/* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
for (gpc_id = 0;
gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_id++) {
num_tpc_mask = gpc_tpc_mask[gpc_id];
if ((gpc_id == disable_gpc_id) &&
((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) {
/* Safety check if a TPC is removed twice */
if (is_tpc_removed_gpc) {
err = -EINVAL;
goto free_resources;
}
/* Remove logical TPC from set */
num_tpc_mask &= ~(BIT32(disable_tpc_id));
is_tpc_removed_gpc = true;
}
/* track balancing of tpcs across gpcs */
num_tpc_gpc[gpc_id] = hweight32(num_tpc_mask);
average_tpcs += num_tpc_gpc[gpc_id];
/* save the maximum numer of gpcs */
max_tpc_gpc = num_tpc_gpc[gpc_id] > max_tpc_gpc ?
num_tpc_gpc[gpc_id] : max_tpc_gpc;
/*
* Calculate ratio between TPC count and post-FS and post-SCG
*
* ratio represents relative throughput of the GPC
*/
scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] /
nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id);
if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) {
min_scg_gpc_pix_perf = scg_gpc_pix_perf;
}
/* Calculate # of surviving PES */
for (pes_id = 0;
pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_id);
pes_id++) {
/* Count the number of TPC on the set */
num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask(
gr->config, gpc_id, pes_id) &
gpc_tpc_mask[gpc_id];
if ((gpc_id == disable_gpc_id) &&
((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) {
if (is_tpc_removed_pes) {
err = -EINVAL;
goto free_resources;
}
num_tpc_mask &= ~(BIT32(disable_tpc_id));
is_tpc_removed_pes = true;
}
if (hweight32(num_tpc_mask) != 0UL) {
scg_num_pes++;
}
}
}
if (!is_tpc_removed_gpc || !is_tpc_removed_pes) {
err = -EINVAL;
goto free_resources;
}
if (max_tpc_gpc == 0U) {
*perf = 0;
goto free_resources;
}
/* Now calculate perf */
scg_world_perf = (scale_factor * scg_num_pes) /
nvgpu_gr_config_get_ppc_count(gr->config);
deviation = 0;
average_tpcs = scale_factor * average_tpcs /
nvgpu_gr_config_get_gpc_count(gr->config);
for (gpc_id =0;
gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_id++) {
diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id];
if (diff < 0) {
diff = -diff;
}
deviation += U32(diff);
}
deviation /= nvgpu_gr_config_get_gpc_count(gr->config);
norm_tpc_deviation = deviation / max_tpc_gpc;
tpc_balance = scale_factor - norm_tpc_deviation;
if ((tpc_balance > scale_factor) ||
(scg_world_perf > scale_factor) ||
(min_scg_gpc_pix_perf > scale_factor) ||
(norm_tpc_deviation > scale_factor)) {
err = -EINVAL;
goto free_resources;
}
*perf = (pix_scale * min_scg_gpc_pix_perf) +
(world_scale * scg_world_perf) +
(tpc_scale * tpc_balance);
free_resources:
nvgpu_kfree(g, num_tpc_gpc);
return err;
}
void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
{
}
int gr_gv100_init_sm_id_table(struct gk20a *g)
{
unsigned long tpc;
u32 gpc, sm, pes, gtpc;
u32 sm_id = 0;
u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
struct gr_gk20a *gr = &g->gr;
u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr->config);
int perf, maxperf;
int err = 0;
unsigned long *gpc_tpc_mask;
u32 *tpc_table, *gpc_table;
gpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
sizeof(u32));
tpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
sizeof(u32));
gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) *
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
if ((gpc_table == NULL) ||
(tpc_table == NULL) ||
(gpc_tpc_mask == NULL)) {
nvgpu_err(g, "Error allocating memory for sm tables");
err = -ENOMEM;
goto exit_build_table;
}
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
for (pes = 0;
pes < nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc);
pes++) {
gpc_tpc_mask[gpc] |= nvgpu_gr_config_get_pes_tpc_mask(
g->gr.config, gpc, pes);
}
}
for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr->config); gtpc++) {
maxperf = -1;
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
for_each_set_bit(tpc, &gpc_tpc_mask[gpc],
nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) {
perf = -1;
err = gr_gv100_scg_estimate_perf(g,
gpc_tpc_mask, gpc, tpc, &perf);
if (err != 0) {
nvgpu_err(g,
"Error while estimating perf");
goto exit_build_table;
}
if (perf >= maxperf) {
maxperf = perf;
gpc_table[gtpc] = gpc;
tpc_table[gtpc] = tpc;
}
}
}
gpc_tpc_mask[gpc_table[gtpc]] &= ~(BIT64(tpc_table[gtpc]));
}
for (tpc = 0, sm_id = 0; sm_id < num_sm; tpc++, sm_id += sm_per_tpc) {
for (sm = 0; sm < sm_per_tpc; sm++) {
u32 index = sm_id + sm;
g->gr.sm_to_cluster[index].gpc_index = gpc_table[tpc];
g->gr.sm_to_cluster[index].tpc_index = tpc_table[tpc];
g->gr.sm_to_cluster[index].sm_index = sm;
g->gr.sm_to_cluster[index].global_tpc_index = tpc;
nvgpu_log_info(g,
"gpc : %d tpc %d sm_index %d global_index: %d",
g->gr.sm_to_cluster[index].gpc_index,
g->gr.sm_to_cluster[index].tpc_index,
g->gr.sm_to_cluster[index].sm_index,
g->gr.sm_to_cluster[index].global_tpc_index);
}
}
g->gr.no_of_sm = num_sm;
nvgpu_log_info(g, " total number of sm = %d", g->gr.no_of_sm);
exit_build_table:
nvgpu_kfree(g, gpc_table);
nvgpu_kfree(g, tpc_table);
nvgpu_kfree(g, gpc_tpc_mask);
return err;
}
u32 gr_gv100_get_patch_slots(struct gk20a *g)
{
struct gr_gk20a *gr = &g->gr;

View File

@@ -1,7 +1,7 @@
/*
* GV100 GPU GR
*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -26,7 +26,6 @@
#define NVGPU_GR_GV100_H
void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
int gr_gv100_init_sm_id_table(struct gk20a *g);
void gr_gv100_program_sm_id_numbering(struct gk20a *g,
u32 gpc, u32 tpc, u32 smid);
int gr_gv100_load_smid_config(struct gk20a *g);

View File

@@ -57,6 +57,7 @@
#include "hal/fifo/fifo_intr_gv11b.h"
#include "hal/gr/fecs_trace/fecs_trace_gm20b.h"
#include "hal/gr/config/gr_config_gm20b.h"
#include "hal/gr/config/gr_config_gv100.h"
#include "hal/gr/zbc/zbc_gp10b.h"
#include "hal/gr/zbc/zbc_gv11b.h"
#include "hal/gr/init/gr_init_gm20b.h"
@@ -421,7 +422,6 @@ static const struct gpu_ops gv100_ops = {
.suspend_contexts = gr_gp10b_suspend_contexts,
.resume_contexts = gr_gk20a_resume_contexts,
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
.init_sm_id_table = gr_gv100_init_sm_id_table,
.commit_inst = gr_gv11b_commit_inst,
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -582,6 +582,7 @@ static const struct gpu_ops gv100_ops = {
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
.get_pd_dist_skip_table_size =
gm20b_gr_config_get_pd_dist_skip_table_size,
.init_sm_id_table = gv100_gr_config_init_sm_id_table,
},
#ifdef CONFIG_GK20A_CTXSW_TRACE
.fecs_trace = {

View File

@@ -30,6 +30,7 @@
#include "hal/bus/bus_gm20b.h"
#include "hal/priv_ring/priv_ring_gm20b.h"
#include "hal/priv_ring/priv_ring_gp10b.h"
#include "hal/gr/config/gr_config_gv100.h"
#include "hal/power_features/cg/gv11b_gating_reglist.h"
#include "hal/cbc/cbc_gm20b.h"
#include "hal/cbc/cbc_gp10b.h"
@@ -372,7 +373,6 @@ static const struct gpu_ops gv11b_ops = {
.suspend_contexts = gr_gp10b_suspend_contexts,
.resume_contexts = gr_gk20a_resume_contexts,
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
.init_sm_id_table = gr_gv100_init_sm_id_table,
.commit_inst = gr_gv11b_commit_inst,
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -542,6 +542,7 @@ static const struct gpu_ops gv11b_ops = {
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
.get_pd_dist_skip_table_size =
gm20b_gr_config_get_pd_dist_skip_table_size,
.init_sm_id_table = gv100_gr_config_init_sm_id_table,
},
#ifdef CONFIG_GK20A_CTXSW_TRACE
.fecs_trace = {

View File

@@ -28,6 +28,30 @@
#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
int gm20b_gr_config_init_sm_id_table(struct gk20a *g)
{
u32 gpc, tpc;
u32 sm_id = 0;
for (tpc = 0;
tpc < nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config);
tpc++) {
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
if (tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) {
g->gr.sm_to_cluster[sm_id].tpc_index = tpc;
g->gr.sm_to_cluster[sm_id].gpc_index = gpc;
g->gr.sm_to_cluster[sm_id].sm_index = 0;
g->gr.sm_to_cluster[sm_id].global_tpc_index =
sm_id;
sm_id++;
}
}
}
g->gr.no_of_sm = sm_id;
return 0;
}
u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index)
{

View File

@@ -28,6 +28,7 @@
struct gk20a;
struct nvgpu_gr_config;
int gm20b_gr_config_init_sm_id_table(struct gk20a *g);
u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g,
struct nvgpu_gr_config *config, u32 gpc_index);
u32 gm20b_gr_config_get_tpc_count_in_gpc(struct gk20a *g,

View File

@@ -0,0 +1,266 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/types.h>
#include <nvgpu/gr/config.h>
#include "gr_config_gv100.h"
/*
* Estimate performance if the given logical TPC in the given logical GPC were
* removed.
*/
static int gr_gv100_scg_estimate_perf(struct gk20a *g,
unsigned long *gpc_tpc_mask,
u32 disable_gpc_id, u32 disable_tpc_id,
int *perf)
{
struct gr_gk20a *gr = &g->gr;
int err = 0;
u32 scale_factor = 512U; /* Use fx23.9 */
u32 pix_scale = 1024U*1024U; /* Pix perf in [29:20] */
u32 world_scale = 1024U; /* World performance in [19:10] */
u32 tpc_scale = 1U; /* TPC balancing in [9:0] */
u32 scg_num_pes = 0U;
u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */
u32 average_tpcs = 0U; /* Average of # of TPCs per GPC */
u32 deviation; /* absolute diff between TPC# and
* average_tpcs, averaged across GPCs
*/
u32 norm_tpc_deviation; /* deviation/max_tpc_per_gpc */
u32 tpc_balance;
u32 scg_gpc_pix_perf;
u32 scg_world_perf;
u32 gpc_id;
u32 pes_id;
int diff;
bool is_tpc_removed_gpc = false;
bool is_tpc_removed_pes = false;
u32 max_tpc_gpc = 0U;
u32 num_tpc_mask;
u32 *num_tpc_gpc = nvgpu_kzalloc(g, sizeof(u32) *
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
if (num_tpc_gpc == NULL) {
return -ENOMEM;
}
/* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
for (gpc_id = 0;
gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_id++) {
num_tpc_mask = gpc_tpc_mask[gpc_id];
if ((gpc_id == disable_gpc_id) &&
((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) {
/* Safety check if a TPC is removed twice */
if (is_tpc_removed_gpc) {
err = -EINVAL;
goto free_resources;
}
/* Remove logical TPC from set */
num_tpc_mask &= ~(BIT32(disable_tpc_id));
is_tpc_removed_gpc = true;
}
/* track balancing of tpcs across gpcs */
num_tpc_gpc[gpc_id] = hweight32(num_tpc_mask);
average_tpcs += num_tpc_gpc[gpc_id];
/* save the maximum numer of gpcs */
max_tpc_gpc = num_tpc_gpc[gpc_id] > max_tpc_gpc ?
num_tpc_gpc[gpc_id] : max_tpc_gpc;
/*
* Calculate ratio between TPC count and post-FS and post-SCG
*
* ratio represents relative throughput of the GPC
*/
scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] /
nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id);
if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) {
min_scg_gpc_pix_perf = scg_gpc_pix_perf;
}
/* Calculate # of surviving PES */
for (pes_id = 0;
pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_id);
pes_id++) {
/* Count the number of TPC on the set */
num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask(
gr->config, gpc_id, pes_id) &
gpc_tpc_mask[gpc_id];
if ((gpc_id == disable_gpc_id) &&
((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) {
if (is_tpc_removed_pes) {
err = -EINVAL;
goto free_resources;
}
num_tpc_mask &= ~(BIT32(disable_tpc_id));
is_tpc_removed_pes = true;
}
if (hweight32(num_tpc_mask) != 0UL) {
scg_num_pes++;
}
}
}
if (!is_tpc_removed_gpc || !is_tpc_removed_pes) {
err = -EINVAL;
goto free_resources;
}
if (max_tpc_gpc == 0U) {
*perf = 0;
goto free_resources;
}
/* Now calculate perf */
scg_world_perf = (scale_factor * scg_num_pes) /
nvgpu_gr_config_get_ppc_count(gr->config);
deviation = 0;
average_tpcs = scale_factor * average_tpcs /
nvgpu_gr_config_get_gpc_count(gr->config);
for (gpc_id =0;
gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
gpc_id++) {
diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id];
if (diff < 0) {
diff = -diff;
}
deviation += U32(diff);
}
deviation /= nvgpu_gr_config_get_gpc_count(gr->config);
norm_tpc_deviation = deviation / max_tpc_gpc;
tpc_balance = scale_factor - norm_tpc_deviation;
if ((tpc_balance > scale_factor) ||
(scg_world_perf > scale_factor) ||
(min_scg_gpc_pix_perf > scale_factor) ||
(norm_tpc_deviation > scale_factor)) {
err = -EINVAL;
goto free_resources;
}
*perf = (pix_scale * min_scg_gpc_pix_perf) +
(world_scale * scg_world_perf) +
(tpc_scale * tpc_balance);
free_resources:
nvgpu_kfree(g, num_tpc_gpc);
return err;
}
int gv100_gr_config_init_sm_id_table(struct gk20a *g)
{
unsigned long tpc;
u32 gpc, sm, pes, gtpc;
u32 sm_id = 0;
u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
struct gr_gk20a *gr = &g->gr;
u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr->config);
int perf, maxperf;
int err = 0;
unsigned long *gpc_tpc_mask;
u32 *tpc_table, *gpc_table;
gpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
sizeof(u32));
tpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
sizeof(u32));
gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) *
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
if ((gpc_table == NULL) ||
(tpc_table == NULL) ||
(gpc_tpc_mask == NULL)) {
nvgpu_err(g, "Error allocating memory for sm tables");
err = -ENOMEM;
goto exit_build_table;
}
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
for (pes = 0;
pes < nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc);
pes++) {
gpc_tpc_mask[gpc] |= nvgpu_gr_config_get_pes_tpc_mask(
g->gr.config, gpc, pes);
}
}
for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr->config); gtpc++) {
maxperf = -1;
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
for_each_set_bit(tpc, &gpc_tpc_mask[gpc],
nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) {
perf = -1;
err = gr_gv100_scg_estimate_perf(g,
gpc_tpc_mask, gpc, tpc, &perf);
if (err != 0) {
nvgpu_err(g,
"Error while estimating perf");
goto exit_build_table;
}
if (perf >= maxperf) {
maxperf = perf;
gpc_table[gtpc] = gpc;
tpc_table[gtpc] = tpc;
}
}
}
gpc_tpc_mask[gpc_table[gtpc]] &= ~(BIT64(tpc_table[gtpc]));
}
for (tpc = 0, sm_id = 0; sm_id < num_sm; tpc++, sm_id += sm_per_tpc) {
for (sm = 0; sm < sm_per_tpc; sm++) {
u32 index = sm_id + sm;
g->gr.sm_to_cluster[index].gpc_index = gpc_table[tpc];
g->gr.sm_to_cluster[index].tpc_index = tpc_table[tpc];
g->gr.sm_to_cluster[index].sm_index = sm;
g->gr.sm_to_cluster[index].global_tpc_index = tpc;
nvgpu_log_info(g,
"gpc : %d tpc %d sm_index %d global_index: %d",
g->gr.sm_to_cluster[index].gpc_index,
g->gr.sm_to_cluster[index].tpc_index,
g->gr.sm_to_cluster[index].sm_index,
g->gr.sm_to_cluster[index].global_tpc_index);
}
}
g->gr.no_of_sm = num_sm;
nvgpu_log_info(g, " total number of sm = %d", g->gr.no_of_sm);
exit_build_table:
nvgpu_kfree(g, gpc_table);
nvgpu_kfree(g, tpc_table);
nvgpu_kfree(g, gpc_tpc_mask);
return err;
}

View File

@@ -0,0 +1,32 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_CONFIG_GV100_H
#define NVGPU_GR_CONFIG_GV100_H
#include <nvgpu/types.h>
struct gk20a;
int gv100_gr_config_init_sm_id_table(struct gk20a *g);
#endif /* NVGPU_GR_CONFIG_GV100_H */

View File

@@ -415,7 +415,6 @@ struct gpu_ops {
u32 graphics_preempt_mode,
u32 compute_preempt_mode);
int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost);
int (*init_sm_id_table)(struct gk20a *g);
int (*init_sw_veid_bundle)(struct gk20a *g);
int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va);
int (*trigger_suspend)(struct gk20a *g);
@@ -582,6 +581,7 @@ struct gpu_ops {
struct nvgpu_gr_config *config, u32 gpc_index,
u32 pes_index);
u32 (*get_pd_dist_skip_table_size)(void);
int (*init_sm_id_table)(struct gk20a *g);
} config;
#ifdef CONFIG_GK20A_CTXSW_TRACE

View File

@@ -61,6 +61,7 @@
#include "hal/gr/fecs_trace/fecs_trace_gm20b.h"
#include "hal/gr/fecs_trace/fecs_trace_gv11b.h"
#include "hal/gr/config/gr_config_gm20b.h"
#include "hal/gr/config/gr_config_gv100.h"
#include "hal/gr/zbc/zbc_gp10b.h"
#include "hal/gr/zbc/zbc_gv11b.h"
#include "hal/gr/zcull/zcull_gm20b.h"
@@ -441,7 +442,6 @@ static const struct gpu_ops tu104_ops = {
.suspend_contexts = gr_gp10b_suspend_contexts,
.resume_contexts = gr_gk20a_resume_contexts,
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
.init_sm_id_table = gr_gv100_init_sm_id_table,
.commit_inst = gr_gv11b_commit_inst,
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -610,6 +610,7 @@ static const struct gpu_ops tu104_ops = {
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
.get_pd_dist_skip_table_size =
gm20b_gr_config_get_pd_dist_skip_table_size,
.init_sm_id_table = gv100_gr_config_init_sm_id_table,
},
#ifdef CONFIG_GK20A_CTXSW_TRACE
.fecs_trace = {