mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: move init_sm_id_table hal to hal.gr.config
Move init_sm_id_table hal to common.hal.gr.config. Two separate hals for gm20b and gv100 are added. JIRA NVGPU-1884 Change-Id: Id307542db67b103ec25b02b41fd3b9d9bd8f30f0 Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2073582 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
e649d19c65
commit
03e137b552
@@ -442,7 +442,8 @@ nvgpu-$(CONFIG_GK20A_VIDMEM) += \
|
||||
common/mm/vidmem.o
|
||||
|
||||
nvgpu-y += \
|
||||
hal/gr/config/gr_config_gm20b.o
|
||||
hal/gr/config/gr_config_gm20b.o \
|
||||
hal/gr/config/gr_config_gv100.o
|
||||
|
||||
nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
|
||||
common/vgpu/ltc/ltc_vgpu.o \
|
||||
|
||||
@@ -387,7 +387,8 @@ ifeq ($(NVGPU_DEBUGGER),1)
|
||||
srcs += common/debugger.c
|
||||
endif
|
||||
|
||||
srcs += hal/gr/config/gr_config_gm20b.c
|
||||
srcs += hal/gr/config/gr_config_gm20b.c \
|
||||
hal/gr/config/gr_config_gv100.c
|
||||
|
||||
ifeq ($(NVGPU_LS_PMU),1)
|
||||
# Add LS PMU files which are required for normal build
|
||||
|
||||
@@ -101,8 +101,8 @@ int nvgpu_gr_init_fs_state(struct gk20a *g)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (g->ops.gr.init_sm_id_table != NULL) {
|
||||
err = g->ops.gr.init_sm_id_table(g);
|
||||
if (g->ops.gr.config.init_sm_id_table != NULL) {
|
||||
err = g->ops.gr.config.init_sm_id_table(g);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -160,7 +160,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
||||
.suspend_contexts = vgpu_gr_suspend_contexts,
|
||||
.resume_contexts = vgpu_gr_resume_contexts,
|
||||
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
|
||||
.init_sm_id_table = vgpu_gr_init_sm_id_table,
|
||||
.commit_inst = vgpu_gr_commit_inst,
|
||||
.trigger_suspend = NULL,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -282,6 +281,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
||||
},
|
||||
.config = {
|
||||
.get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
|
||||
.init_sm_id_table = vgpu_gr_init_sm_id_table,
|
||||
},
|
||||
.zbc = {
|
||||
.add_color = NULL,
|
||||
|
||||
@@ -1151,11 +1151,11 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g)
|
||||
|
||||
int vgpu_gr_init_fs_state(struct gk20a *g)
|
||||
{
|
||||
if (!g->ops.gr.init_sm_id_table) {
|
||||
if (!g->ops.gr.config.init_sm_id_table) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return g->ops.gr.init_sm_id_table(g);
|
||||
return g->ops.gr.config.init_sm_id_table(g);
|
||||
}
|
||||
|
||||
int vgpu_gr_update_pc_sampling(struct channel_gk20a *ch, bool enable)
|
||||
|
||||
@@ -181,7 +181,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
||||
.suspend_contexts = vgpu_gr_suspend_contexts,
|
||||
.resume_contexts = vgpu_gr_resume_contexts,
|
||||
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
|
||||
.init_sm_id_table = vgpu_gr_init_sm_id_table,
|
||||
.commit_inst = vgpu_gr_commit_inst,
|
||||
.trigger_suspend = NULL,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -329,6 +328,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
||||
},
|
||||
.config = {
|
||||
.get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
|
||||
.init_sm_id_table = vgpu_gr_init_sm_id_table,
|
||||
},
|
||||
.zbc = {
|
||||
.add_color = NULL,
|
||||
|
||||
@@ -768,30 +768,6 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gr_gk20a_init_sm_id_table(struct gk20a *g)
|
||||
{
|
||||
u32 gpc, tpc;
|
||||
u32 sm_id = 0;
|
||||
|
||||
for (tpc = 0;
|
||||
tpc < nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config);
|
||||
tpc++) {
|
||||
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
|
||||
|
||||
if (tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) {
|
||||
g->gr.sm_to_cluster[sm_id].tpc_index = tpc;
|
||||
g->gr.sm_to_cluster[sm_id].gpc_index = gpc;
|
||||
g->gr.sm_to_cluster[sm_id].sm_index = 0;
|
||||
g->gr.sm_to_cluster[sm_id].global_tpc_index =
|
||||
sm_id;
|
||||
sm_id++;
|
||||
}
|
||||
}
|
||||
}
|
||||
g->gr.no_of_sm = sm_id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
|
||||
{
|
||||
struct gk20a *g = c->g;
|
||||
|
||||
@@ -457,8 +457,6 @@ int gr_gk20a_resume_from_pause(struct gk20a *g);
|
||||
int gr_gk20a_clear_sm_errors(struct gk20a *g);
|
||||
u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g);
|
||||
|
||||
int gr_gk20a_init_sm_id_table(struct gk20a *g);
|
||||
|
||||
int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
|
||||
|
||||
u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc);
|
||||
|
||||
@@ -284,7 +284,6 @@ static const struct gpu_ops gm20b_ops = {
|
||||
.suspend_contexts = gr_gk20a_suspend_contexts,
|
||||
.resume_contexts = gr_gk20a_resume_contexts,
|
||||
.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags,
|
||||
.init_sm_id_table = gr_gk20a_init_sm_id_table,
|
||||
.commit_inst = gr_gk20a_commit_inst,
|
||||
.trigger_suspend = gr_gk20a_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -399,6 +398,7 @@ static const struct gpu_ops gm20b_ops = {
|
||||
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
|
||||
.get_pd_dist_skip_table_size =
|
||||
gm20b_gr_config_get_pd_dist_skip_table_size,
|
||||
.init_sm_id_table = gm20b_gr_config_init_sm_id_table,
|
||||
},
|
||||
.zbc = {
|
||||
.add_color = gm20b_gr_zbc_add_color,
|
||||
|
||||
@@ -307,7 +307,6 @@ static const struct gpu_ops gp10b_ops = {
|
||||
.suspend_contexts = gr_gp10b_suspend_contexts,
|
||||
.resume_contexts = gr_gk20a_resume_contexts,
|
||||
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
|
||||
.init_sm_id_table = gr_gk20a_init_sm_id_table,
|
||||
.commit_inst = gr_gk20a_commit_inst,
|
||||
.trigger_suspend = gr_gk20a_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -445,6 +444,7 @@ static const struct gpu_ops gp10b_ops = {
|
||||
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
|
||||
.get_pd_dist_skip_table_size =
|
||||
gm20b_gr_config_get_pd_dist_skip_table_size,
|
||||
.init_sm_id_table = gm20b_gr_config_init_sm_id_table,
|
||||
},
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
.fecs_trace = {
|
||||
|
||||
@@ -40,250 +40,10 @@
|
||||
#include <nvgpu/hw/gv100/hw_proj_gv100.h>
|
||||
#include <nvgpu/hw/gv100/hw_perf_gv100.h>
|
||||
|
||||
|
||||
/*
|
||||
* Estimate performance if the given logical TPC in the given logical GPC were
|
||||
* removed.
|
||||
*/
|
||||
static int gr_gv100_scg_estimate_perf(struct gk20a *g,
|
||||
unsigned long *gpc_tpc_mask,
|
||||
u32 disable_gpc_id, u32 disable_tpc_id,
|
||||
int *perf)
|
||||
{
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
int err = 0;
|
||||
u32 scale_factor = 512U; /* Use fx23.9 */
|
||||
u32 pix_scale = 1024U*1024U; /* Pix perf in [29:20] */
|
||||
u32 world_scale = 1024U; /* World performance in [19:10] */
|
||||
u32 tpc_scale = 1U; /* TPC balancing in [9:0] */
|
||||
u32 scg_num_pes = 0U;
|
||||
u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */
|
||||
u32 average_tpcs = 0U; /* Average of # of TPCs per GPC */
|
||||
u32 deviation; /* absolute diff between TPC# and
|
||||
* average_tpcs, averaged across GPCs
|
||||
*/
|
||||
u32 norm_tpc_deviation; /* deviation/max_tpc_per_gpc */
|
||||
u32 tpc_balance;
|
||||
u32 scg_gpc_pix_perf;
|
||||
u32 scg_world_perf;
|
||||
u32 gpc_id;
|
||||
u32 pes_id;
|
||||
int diff;
|
||||
bool is_tpc_removed_gpc = false;
|
||||
bool is_tpc_removed_pes = false;
|
||||
u32 max_tpc_gpc = 0U;
|
||||
u32 num_tpc_mask;
|
||||
u32 *num_tpc_gpc = nvgpu_kzalloc(g, sizeof(u32) *
|
||||
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
|
||||
|
||||
if (num_tpc_gpc == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
|
||||
for (gpc_id = 0;
|
||||
gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||
gpc_id++) {
|
||||
num_tpc_mask = gpc_tpc_mask[gpc_id];
|
||||
|
||||
if ((gpc_id == disable_gpc_id) &&
|
||||
((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) {
|
||||
/* Safety check if a TPC is removed twice */
|
||||
if (is_tpc_removed_gpc) {
|
||||
err = -EINVAL;
|
||||
goto free_resources;
|
||||
}
|
||||
/* Remove logical TPC from set */
|
||||
num_tpc_mask &= ~(BIT32(disable_tpc_id));
|
||||
is_tpc_removed_gpc = true;
|
||||
}
|
||||
|
||||
/* track balancing of tpcs across gpcs */
|
||||
num_tpc_gpc[gpc_id] = hweight32(num_tpc_mask);
|
||||
average_tpcs += num_tpc_gpc[gpc_id];
|
||||
|
||||
/* save the maximum numer of gpcs */
|
||||
max_tpc_gpc = num_tpc_gpc[gpc_id] > max_tpc_gpc ?
|
||||
num_tpc_gpc[gpc_id] : max_tpc_gpc;
|
||||
|
||||
/*
|
||||
* Calculate ratio between TPC count and post-FS and post-SCG
|
||||
*
|
||||
* ratio represents relative throughput of the GPC
|
||||
*/
|
||||
scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] /
|
||||
nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id);
|
||||
|
||||
if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) {
|
||||
min_scg_gpc_pix_perf = scg_gpc_pix_perf;
|
||||
}
|
||||
|
||||
/* Calculate # of surviving PES */
|
||||
for (pes_id = 0;
|
||||
pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_id);
|
||||
pes_id++) {
|
||||
/* Count the number of TPC on the set */
|
||||
num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask(
|
||||
gr->config, gpc_id, pes_id) &
|
||||
gpc_tpc_mask[gpc_id];
|
||||
|
||||
if ((gpc_id == disable_gpc_id) &&
|
||||
((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) {
|
||||
|
||||
if (is_tpc_removed_pes) {
|
||||
err = -EINVAL;
|
||||
goto free_resources;
|
||||
}
|
||||
num_tpc_mask &= ~(BIT32(disable_tpc_id));
|
||||
is_tpc_removed_pes = true;
|
||||
}
|
||||
if (hweight32(num_tpc_mask) != 0UL) {
|
||||
scg_num_pes++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_tpc_removed_gpc || !is_tpc_removed_pes) {
|
||||
err = -EINVAL;
|
||||
goto free_resources;
|
||||
}
|
||||
|
||||
if (max_tpc_gpc == 0U) {
|
||||
*perf = 0;
|
||||
goto free_resources;
|
||||
}
|
||||
|
||||
/* Now calculate perf */
|
||||
scg_world_perf = (scale_factor * scg_num_pes) /
|
||||
nvgpu_gr_config_get_ppc_count(gr->config);
|
||||
deviation = 0;
|
||||
average_tpcs = scale_factor * average_tpcs /
|
||||
nvgpu_gr_config_get_gpc_count(gr->config);
|
||||
for (gpc_id =0;
|
||||
gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||
gpc_id++) {
|
||||
diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id];
|
||||
if (diff < 0) {
|
||||
diff = -diff;
|
||||
}
|
||||
deviation += U32(diff);
|
||||
}
|
||||
|
||||
deviation /= nvgpu_gr_config_get_gpc_count(gr->config);
|
||||
|
||||
norm_tpc_deviation = deviation / max_tpc_gpc;
|
||||
|
||||
tpc_balance = scale_factor - norm_tpc_deviation;
|
||||
|
||||
if ((tpc_balance > scale_factor) ||
|
||||
(scg_world_perf > scale_factor) ||
|
||||
(min_scg_gpc_pix_perf > scale_factor) ||
|
||||
(norm_tpc_deviation > scale_factor)) {
|
||||
err = -EINVAL;
|
||||
goto free_resources;
|
||||
}
|
||||
|
||||
*perf = (pix_scale * min_scg_gpc_pix_perf) +
|
||||
(world_scale * scg_world_perf) +
|
||||
(tpc_scale * tpc_balance);
|
||||
free_resources:
|
||||
nvgpu_kfree(g, num_tpc_gpc);
|
||||
return err;
|
||||
}
|
||||
|
||||
void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
|
||||
{
|
||||
}
|
||||
|
||||
int gr_gv100_init_sm_id_table(struct gk20a *g)
|
||||
{
|
||||
unsigned long tpc;
|
||||
u32 gpc, sm, pes, gtpc;
|
||||
u32 sm_id = 0;
|
||||
u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr->config);
|
||||
int perf, maxperf;
|
||||
int err = 0;
|
||||
unsigned long *gpc_tpc_mask;
|
||||
u32 *tpc_table, *gpc_table;
|
||||
|
||||
gpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
|
||||
sizeof(u32));
|
||||
tpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
|
||||
sizeof(u32));
|
||||
gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) *
|
||||
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
|
||||
|
||||
if ((gpc_table == NULL) ||
|
||||
(tpc_table == NULL) ||
|
||||
(gpc_tpc_mask == NULL)) {
|
||||
nvgpu_err(g, "Error allocating memory for sm tables");
|
||||
err = -ENOMEM;
|
||||
goto exit_build_table;
|
||||
}
|
||||
|
||||
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
|
||||
for (pes = 0;
|
||||
pes < nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc);
|
||||
pes++) {
|
||||
gpc_tpc_mask[gpc] |= nvgpu_gr_config_get_pes_tpc_mask(
|
||||
g->gr.config, gpc, pes);
|
||||
}
|
||||
}
|
||||
|
||||
for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr->config); gtpc++) {
|
||||
maxperf = -1;
|
||||
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
|
||||
for_each_set_bit(tpc, &gpc_tpc_mask[gpc],
|
||||
nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) {
|
||||
perf = -1;
|
||||
err = gr_gv100_scg_estimate_perf(g,
|
||||
gpc_tpc_mask, gpc, tpc, &perf);
|
||||
|
||||
if (err != 0) {
|
||||
nvgpu_err(g,
|
||||
"Error while estimating perf");
|
||||
goto exit_build_table;
|
||||
}
|
||||
|
||||
if (perf >= maxperf) {
|
||||
maxperf = perf;
|
||||
gpc_table[gtpc] = gpc;
|
||||
tpc_table[gtpc] = tpc;
|
||||
}
|
||||
}
|
||||
}
|
||||
gpc_tpc_mask[gpc_table[gtpc]] &= ~(BIT64(tpc_table[gtpc]));
|
||||
}
|
||||
|
||||
for (tpc = 0, sm_id = 0; sm_id < num_sm; tpc++, sm_id += sm_per_tpc) {
|
||||
for (sm = 0; sm < sm_per_tpc; sm++) {
|
||||
u32 index = sm_id + sm;
|
||||
|
||||
g->gr.sm_to_cluster[index].gpc_index = gpc_table[tpc];
|
||||
g->gr.sm_to_cluster[index].tpc_index = tpc_table[tpc];
|
||||
g->gr.sm_to_cluster[index].sm_index = sm;
|
||||
g->gr.sm_to_cluster[index].global_tpc_index = tpc;
|
||||
nvgpu_log_info(g,
|
||||
"gpc : %d tpc %d sm_index %d global_index: %d",
|
||||
g->gr.sm_to_cluster[index].gpc_index,
|
||||
g->gr.sm_to_cluster[index].tpc_index,
|
||||
g->gr.sm_to_cluster[index].sm_index,
|
||||
g->gr.sm_to_cluster[index].global_tpc_index);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
g->gr.no_of_sm = num_sm;
|
||||
nvgpu_log_info(g, " total number of sm = %d", g->gr.no_of_sm);
|
||||
exit_build_table:
|
||||
nvgpu_kfree(g, gpc_table);
|
||||
nvgpu_kfree(g, tpc_table);
|
||||
nvgpu_kfree(g, gpc_tpc_mask);
|
||||
return err;
|
||||
}
|
||||
|
||||
u32 gr_gv100_get_patch_slots(struct gk20a *g)
|
||||
{
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* GV100 GPU GR
|
||||
*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -26,7 +26,6 @@
|
||||
#define NVGPU_GR_GV100_H
|
||||
|
||||
void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
|
||||
int gr_gv100_init_sm_id_table(struct gk20a *g);
|
||||
void gr_gv100_program_sm_id_numbering(struct gk20a *g,
|
||||
u32 gpc, u32 tpc, u32 smid);
|
||||
int gr_gv100_load_smid_config(struct gk20a *g);
|
||||
|
||||
@@ -57,6 +57,7 @@
|
||||
#include "hal/fifo/fifo_intr_gv11b.h"
|
||||
#include "hal/gr/fecs_trace/fecs_trace_gm20b.h"
|
||||
#include "hal/gr/config/gr_config_gm20b.h"
|
||||
#include "hal/gr/config/gr_config_gv100.h"
|
||||
#include "hal/gr/zbc/zbc_gp10b.h"
|
||||
#include "hal/gr/zbc/zbc_gv11b.h"
|
||||
#include "hal/gr/init/gr_init_gm20b.h"
|
||||
@@ -421,7 +422,6 @@ static const struct gpu_ops gv100_ops = {
|
||||
.suspend_contexts = gr_gp10b_suspend_contexts,
|
||||
.resume_contexts = gr_gk20a_resume_contexts,
|
||||
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
|
||||
.init_sm_id_table = gr_gv100_init_sm_id_table,
|
||||
.commit_inst = gr_gv11b_commit_inst,
|
||||
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -582,6 +582,7 @@ static const struct gpu_ops gv100_ops = {
|
||||
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
|
||||
.get_pd_dist_skip_table_size =
|
||||
gm20b_gr_config_get_pd_dist_skip_table_size,
|
||||
.init_sm_id_table = gv100_gr_config_init_sm_id_table,
|
||||
},
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
.fecs_trace = {
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include "hal/bus/bus_gm20b.h"
|
||||
#include "hal/priv_ring/priv_ring_gm20b.h"
|
||||
#include "hal/priv_ring/priv_ring_gp10b.h"
|
||||
#include "hal/gr/config/gr_config_gv100.h"
|
||||
#include "hal/power_features/cg/gv11b_gating_reglist.h"
|
||||
#include "hal/cbc/cbc_gm20b.h"
|
||||
#include "hal/cbc/cbc_gp10b.h"
|
||||
@@ -372,7 +373,6 @@ static const struct gpu_ops gv11b_ops = {
|
||||
.suspend_contexts = gr_gp10b_suspend_contexts,
|
||||
.resume_contexts = gr_gk20a_resume_contexts,
|
||||
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
|
||||
.init_sm_id_table = gr_gv100_init_sm_id_table,
|
||||
.commit_inst = gr_gv11b_commit_inst,
|
||||
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -542,6 +542,7 @@ static const struct gpu_ops gv11b_ops = {
|
||||
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
|
||||
.get_pd_dist_skip_table_size =
|
||||
gm20b_gr_config_get_pd_dist_skip_table_size,
|
||||
.init_sm_id_table = gv100_gr_config_init_sm_id_table,
|
||||
},
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
.fecs_trace = {
|
||||
|
||||
@@ -28,6 +28,30 @@
|
||||
|
||||
#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
|
||||
|
||||
int gm20b_gr_config_init_sm_id_table(struct gk20a *g)
|
||||
{
|
||||
u32 gpc, tpc;
|
||||
u32 sm_id = 0;
|
||||
|
||||
for (tpc = 0;
|
||||
tpc < nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config);
|
||||
tpc++) {
|
||||
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) {
|
||||
|
||||
if (tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) {
|
||||
g->gr.sm_to_cluster[sm_id].tpc_index = tpc;
|
||||
g->gr.sm_to_cluster[sm_id].gpc_index = gpc;
|
||||
g->gr.sm_to_cluster[sm_id].sm_index = 0;
|
||||
g->gr.sm_to_cluster[sm_id].global_tpc_index =
|
||||
sm_id;
|
||||
sm_id++;
|
||||
}
|
||||
}
|
||||
}
|
||||
g->gr.no_of_sm = sm_id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g,
|
||||
struct nvgpu_gr_config *config, u32 gpc_index)
|
||||
{
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
struct gk20a;
|
||||
struct nvgpu_gr_config;
|
||||
|
||||
int gm20b_gr_config_init_sm_id_table(struct gk20a *g);
|
||||
u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g,
|
||||
struct nvgpu_gr_config *config, u32 gpc_index);
|
||||
u32 gm20b_gr_config_get_tpc_count_in_gpc(struct gk20a *g,
|
||||
|
||||
266
drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c
Normal file
266
drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c
Normal file
@@ -0,0 +1,266 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/gr/config.h>
|
||||
|
||||
#include "gr_config_gv100.h"
|
||||
|
||||
/*
|
||||
* Estimate performance if the given logical TPC in the given logical GPC were
|
||||
* removed.
|
||||
*/
|
||||
static int gr_gv100_scg_estimate_perf(struct gk20a *g,
|
||||
unsigned long *gpc_tpc_mask,
|
||||
u32 disable_gpc_id, u32 disable_tpc_id,
|
||||
int *perf)
|
||||
{
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
int err = 0;
|
||||
u32 scale_factor = 512U; /* Use fx23.9 */
|
||||
u32 pix_scale = 1024U*1024U; /* Pix perf in [29:20] */
|
||||
u32 world_scale = 1024U; /* World performance in [19:10] */
|
||||
u32 tpc_scale = 1U; /* TPC balancing in [9:0] */
|
||||
u32 scg_num_pes = 0U;
|
||||
u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */
|
||||
u32 average_tpcs = 0U; /* Average of # of TPCs per GPC */
|
||||
u32 deviation; /* absolute diff between TPC# and
|
||||
* average_tpcs, averaged across GPCs
|
||||
*/
|
||||
u32 norm_tpc_deviation; /* deviation/max_tpc_per_gpc */
|
||||
u32 tpc_balance;
|
||||
u32 scg_gpc_pix_perf;
|
||||
u32 scg_world_perf;
|
||||
u32 gpc_id;
|
||||
u32 pes_id;
|
||||
int diff;
|
||||
bool is_tpc_removed_gpc = false;
|
||||
bool is_tpc_removed_pes = false;
|
||||
u32 max_tpc_gpc = 0U;
|
||||
u32 num_tpc_mask;
|
||||
u32 *num_tpc_gpc = nvgpu_kzalloc(g, sizeof(u32) *
|
||||
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
|
||||
|
||||
if (num_tpc_gpc == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
|
||||
for (gpc_id = 0;
|
||||
gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||
gpc_id++) {
|
||||
num_tpc_mask = gpc_tpc_mask[gpc_id];
|
||||
|
||||
if ((gpc_id == disable_gpc_id) &&
|
||||
((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) {
|
||||
/* Safety check if a TPC is removed twice */
|
||||
if (is_tpc_removed_gpc) {
|
||||
err = -EINVAL;
|
||||
goto free_resources;
|
||||
}
|
||||
/* Remove logical TPC from set */
|
||||
num_tpc_mask &= ~(BIT32(disable_tpc_id));
|
||||
is_tpc_removed_gpc = true;
|
||||
}
|
||||
|
||||
/* track balancing of tpcs across gpcs */
|
||||
num_tpc_gpc[gpc_id] = hweight32(num_tpc_mask);
|
||||
average_tpcs += num_tpc_gpc[gpc_id];
|
||||
|
||||
/* save the maximum numer of gpcs */
|
||||
max_tpc_gpc = num_tpc_gpc[gpc_id] > max_tpc_gpc ?
|
||||
num_tpc_gpc[gpc_id] : max_tpc_gpc;
|
||||
|
||||
/*
|
||||
* Calculate ratio between TPC count and post-FS and post-SCG
|
||||
*
|
||||
* ratio represents relative throughput of the GPC
|
||||
*/
|
||||
scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] /
|
||||
nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id);
|
||||
|
||||
if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) {
|
||||
min_scg_gpc_pix_perf = scg_gpc_pix_perf;
|
||||
}
|
||||
|
||||
/* Calculate # of surviving PES */
|
||||
for (pes_id = 0;
|
||||
pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_id);
|
||||
pes_id++) {
|
||||
/* Count the number of TPC on the set */
|
||||
num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask(
|
||||
gr->config, gpc_id, pes_id) &
|
||||
gpc_tpc_mask[gpc_id];
|
||||
|
||||
if ((gpc_id == disable_gpc_id) &&
|
||||
((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) {
|
||||
|
||||
if (is_tpc_removed_pes) {
|
||||
err = -EINVAL;
|
||||
goto free_resources;
|
||||
}
|
||||
num_tpc_mask &= ~(BIT32(disable_tpc_id));
|
||||
is_tpc_removed_pes = true;
|
||||
}
|
||||
if (hweight32(num_tpc_mask) != 0UL) {
|
||||
scg_num_pes++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_tpc_removed_gpc || !is_tpc_removed_pes) {
|
||||
err = -EINVAL;
|
||||
goto free_resources;
|
||||
}
|
||||
|
||||
if (max_tpc_gpc == 0U) {
|
||||
*perf = 0;
|
||||
goto free_resources;
|
||||
}
|
||||
|
||||
/* Now calculate perf */
|
||||
scg_world_perf = (scale_factor * scg_num_pes) /
|
||||
nvgpu_gr_config_get_ppc_count(gr->config);
|
||||
deviation = 0;
|
||||
average_tpcs = scale_factor * average_tpcs /
|
||||
nvgpu_gr_config_get_gpc_count(gr->config);
|
||||
for (gpc_id =0;
|
||||
gpc_id < nvgpu_gr_config_get_gpc_count(gr->config);
|
||||
gpc_id++) {
|
||||
diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id];
|
||||
if (diff < 0) {
|
||||
diff = -diff;
|
||||
}
|
||||
deviation += U32(diff);
|
||||
}
|
||||
|
||||
deviation /= nvgpu_gr_config_get_gpc_count(gr->config);
|
||||
|
||||
norm_tpc_deviation = deviation / max_tpc_gpc;
|
||||
|
||||
tpc_balance = scale_factor - norm_tpc_deviation;
|
||||
|
||||
if ((tpc_balance > scale_factor) ||
|
||||
(scg_world_perf > scale_factor) ||
|
||||
(min_scg_gpc_pix_perf > scale_factor) ||
|
||||
(norm_tpc_deviation > scale_factor)) {
|
||||
err = -EINVAL;
|
||||
goto free_resources;
|
||||
}
|
||||
|
||||
*perf = (pix_scale * min_scg_gpc_pix_perf) +
|
||||
(world_scale * scg_world_perf) +
|
||||
(tpc_scale * tpc_balance);
|
||||
free_resources:
|
||||
nvgpu_kfree(g, num_tpc_gpc);
|
||||
return err;
|
||||
}
|
||||
|
||||
int gv100_gr_config_init_sm_id_table(struct gk20a *g)
|
||||
{
|
||||
unsigned long tpc;
|
||||
u32 gpc, sm, pes, gtpc;
|
||||
u32 sm_id = 0;
|
||||
u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr->config);
|
||||
int perf, maxperf;
|
||||
int err = 0;
|
||||
unsigned long *gpc_tpc_mask;
|
||||
u32 *tpc_table, *gpc_table;
|
||||
|
||||
gpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
|
||||
sizeof(u32));
|
||||
tpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) *
|
||||
sizeof(u32));
|
||||
gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) *
|
||||
nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS));
|
||||
|
||||
if ((gpc_table == NULL) ||
|
||||
(tpc_table == NULL) ||
|
||||
(gpc_tpc_mask == NULL)) {
|
||||
nvgpu_err(g, "Error allocating memory for sm tables");
|
||||
err = -ENOMEM;
|
||||
goto exit_build_table;
|
||||
}
|
||||
|
||||
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
|
||||
for (pes = 0;
|
||||
pes < nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc);
|
||||
pes++) {
|
||||
gpc_tpc_mask[gpc] |= nvgpu_gr_config_get_pes_tpc_mask(
|
||||
g->gr.config, gpc, pes);
|
||||
}
|
||||
}
|
||||
|
||||
for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr->config); gtpc++) {
|
||||
maxperf = -1;
|
||||
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
|
||||
for_each_set_bit(tpc, &gpc_tpc_mask[gpc],
|
||||
nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) {
|
||||
perf = -1;
|
||||
err = gr_gv100_scg_estimate_perf(g,
|
||||
gpc_tpc_mask, gpc, tpc, &perf);
|
||||
|
||||
if (err != 0) {
|
||||
nvgpu_err(g,
|
||||
"Error while estimating perf");
|
||||
goto exit_build_table;
|
||||
}
|
||||
|
||||
if (perf >= maxperf) {
|
||||
maxperf = perf;
|
||||
gpc_table[gtpc] = gpc;
|
||||
tpc_table[gtpc] = tpc;
|
||||
}
|
||||
}
|
||||
}
|
||||
gpc_tpc_mask[gpc_table[gtpc]] &= ~(BIT64(tpc_table[gtpc]));
|
||||
}
|
||||
|
||||
for (tpc = 0, sm_id = 0; sm_id < num_sm; tpc++, sm_id += sm_per_tpc) {
|
||||
for (sm = 0; sm < sm_per_tpc; sm++) {
|
||||
u32 index = sm_id + sm;
|
||||
|
||||
g->gr.sm_to_cluster[index].gpc_index = gpc_table[tpc];
|
||||
g->gr.sm_to_cluster[index].tpc_index = tpc_table[tpc];
|
||||
g->gr.sm_to_cluster[index].sm_index = sm;
|
||||
g->gr.sm_to_cluster[index].global_tpc_index = tpc;
|
||||
nvgpu_log_info(g,
|
||||
"gpc : %d tpc %d sm_index %d global_index: %d",
|
||||
g->gr.sm_to_cluster[index].gpc_index,
|
||||
g->gr.sm_to_cluster[index].tpc_index,
|
||||
g->gr.sm_to_cluster[index].sm_index,
|
||||
g->gr.sm_to_cluster[index].global_tpc_index);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
g->gr.no_of_sm = num_sm;
|
||||
nvgpu_log_info(g, " total number of sm = %d", g->gr.no_of_sm);
|
||||
exit_build_table:
|
||||
nvgpu_kfree(g, gpc_table);
|
||||
nvgpu_kfree(g, tpc_table);
|
||||
nvgpu_kfree(g, gpc_tpc_mask);
|
||||
return err;
|
||||
}
|
||||
32
drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.h
Normal file
32
drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_GR_CONFIG_GV100_H
|
||||
#define NVGPU_GR_CONFIG_GV100_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
struct gk20a;
|
||||
|
||||
int gv100_gr_config_init_sm_id_table(struct gk20a *g);
|
||||
|
||||
#endif /* NVGPU_GR_CONFIG_GV100_H */
|
||||
@@ -415,7 +415,6 @@ struct gpu_ops {
|
||||
u32 graphics_preempt_mode,
|
||||
u32 compute_preempt_mode);
|
||||
int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost);
|
||||
int (*init_sm_id_table)(struct gk20a *g);
|
||||
int (*init_sw_veid_bundle)(struct gk20a *g);
|
||||
int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va);
|
||||
int (*trigger_suspend)(struct gk20a *g);
|
||||
@@ -582,6 +581,7 @@ struct gpu_ops {
|
||||
struct nvgpu_gr_config *config, u32 gpc_index,
|
||||
u32 pes_index);
|
||||
u32 (*get_pd_dist_skip_table_size)(void);
|
||||
int (*init_sm_id_table)(struct gk20a *g);
|
||||
} config;
|
||||
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
|
||||
@@ -61,6 +61,7 @@
|
||||
#include "hal/gr/fecs_trace/fecs_trace_gm20b.h"
|
||||
#include "hal/gr/fecs_trace/fecs_trace_gv11b.h"
|
||||
#include "hal/gr/config/gr_config_gm20b.h"
|
||||
#include "hal/gr/config/gr_config_gv100.h"
|
||||
#include "hal/gr/zbc/zbc_gp10b.h"
|
||||
#include "hal/gr/zbc/zbc_gv11b.h"
|
||||
#include "hal/gr/zcull/zcull_gm20b.h"
|
||||
@@ -441,7 +442,6 @@ static const struct gpu_ops tu104_ops = {
|
||||
.suspend_contexts = gr_gp10b_suspend_contexts,
|
||||
.resume_contexts = gr_gk20a_resume_contexts,
|
||||
.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags,
|
||||
.init_sm_id_table = gr_gv100_init_sm_id_table,
|
||||
.commit_inst = gr_gv11b_commit_inst,
|
||||
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -610,6 +610,7 @@ static const struct gpu_ops tu104_ops = {
|
||||
.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
|
||||
.get_pd_dist_skip_table_size =
|
||||
gm20b_gr_config_get_pd_dist_skip_table_size,
|
||||
.init_sm_id_table = gv100_gr_config_init_sm_id_table,
|
||||
},
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
.fecs_trace = {
|
||||
|
||||
Reference in New Issue
Block a user