diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 8301dfcab..053c277da 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -442,7 +442,8 @@ nvgpu-$(CONFIG_GK20A_VIDMEM) += \ common/mm/vidmem.o nvgpu-y += \ - hal/gr/config/gr_config_gm20b.o + hal/gr/config/gr_config_gm20b.o \ + hal/gr/config/gr_config_gv100.o nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ common/vgpu/ltc/ltc_vgpu.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index de8f708a5..9876aade9 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -387,7 +387,8 @@ ifeq ($(NVGPU_DEBUGGER),1) srcs += common/debugger.c endif -srcs += hal/gr/config/gr_config_gm20b.c +srcs += hal/gr/config/gr_config_gm20b.c \ + hal/gr/config/gr_config_gv100.c ifeq ($(NVGPU_LS_PMU),1) # Add LS PMU files which are required for normal build diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c index 8867abbd7..8664e1db8 100644 --- a/drivers/gpu/nvgpu/common/gr/gr.c +++ b/drivers/gpu/nvgpu/common/gr/gr.c @@ -101,8 +101,8 @@ int nvgpu_gr_init_fs_state(struct gk20a *g) return err; } - if (g->ops.gr.init_sm_id_table != NULL) { - err = g->ops.gr.init_sm_id_table(g); + if (g->ops.gr.config.init_sm_id_table != NULL) { + err = g->ops.gr.config.init_sm_id_table(g); if (err != 0) { return err; } diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index d6875fad4..aac686afc 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -160,7 +160,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .suspend_contexts = vgpu_gr_suspend_contexts, .resume_contexts = vgpu_gr_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, - .init_sm_id_table = vgpu_gr_init_sm_id_table, .commit_inst = vgpu_gr_commit_inst, .trigger_suspend = NULL, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -282,6 +281,7 @@ static const struct gpu_ops vgpu_gp10b_ops = { }, .config = { .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, + .init_sm_id_table = vgpu_gr_init_sm_id_table, }, .zbc = { .add_color = NULL, diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c index e31397edb..c83f152c4 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c @@ -1151,11 +1151,11 @@ int vgpu_gr_init_sm_id_table(struct gk20a *g) int vgpu_gr_init_fs_state(struct gk20a *g) { - if (!g->ops.gr.init_sm_id_table) { + if (!g->ops.gr.config.init_sm_id_table) { return -EINVAL; } - return g->ops.gr.init_sm_id_table(g); + return g->ops.gr.config.init_sm_id_table(g); } int vgpu_gr_update_pc_sampling(struct channel_gk20a *ch, bool enable) diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index c457a6fe2..bf0ba0e8f 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -181,7 +181,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .suspend_contexts = vgpu_gr_suspend_contexts, .resume_contexts = vgpu_gr_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, - .init_sm_id_table = vgpu_gr_init_sm_id_table, .commit_inst = vgpu_gr_commit_inst, .trigger_suspend = NULL, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -329,6 +328,7 @@ static const struct gpu_ops vgpu_gv11b_ops = { }, .config = { .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, + .init_sm_id_table = vgpu_gr_init_sm_id_table, }, .zbc = { .add_color = NULL, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index fb06865dd..b6bf5929d 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -768,30 +768,6 @@ int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, return 0; } -int gr_gk20a_init_sm_id_table(struct gk20a *g) -{ - u32 gpc, tpc; - u32 sm_id = 0; - - for (tpc = 0; - tpc < nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config); - tpc++) { - for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) { - - if (tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) { - g->gr.sm_to_cluster[sm_id].tpc_index = tpc; - g->gr.sm_to_cluster[sm_id].gpc_index = gpc; - g->gr.sm_to_cluster[sm_id].sm_index = 0; - g->gr.sm_to_cluster[sm_id].global_tpc_index = - sm_id; - sm_id++; - } - } - } - g->gr.no_of_sm = sm_id; - return 0; -} - int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type) { struct gk20a *g = c->g; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 19065702d..e9f75bef6 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -457,8 +457,6 @@ int gr_gk20a_resume_from_pause(struct gk20a *g); int gr_gk20a_clear_sm_errors(struct gk20a *g); u32 gr_gk20a_tpc_enabled_exceptions(struct gk20a *g); -int gr_gk20a_init_sm_id_table(struct gk20a *g); - int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va); u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc); diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index bee6cf985..c4f2303c6 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -284,7 +284,6 @@ static const struct gpu_ops gm20b_ops = { .suspend_contexts = gr_gk20a_suspend_contexts, .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags, - .init_sm_id_table = gr_gk20a_init_sm_id_table, .commit_inst = gr_gk20a_commit_inst, .trigger_suspend = gr_gk20a_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -399,6 +398,7 @@ static const struct gpu_ops gm20b_ops = { .get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask, .get_pd_dist_skip_table_size = gm20b_gr_config_get_pd_dist_skip_table_size, + .init_sm_id_table = gm20b_gr_config_init_sm_id_table, }, .zbc = { .add_color = gm20b_gr_zbc_add_color, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index d0520684f..0a191ac7c 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -307,7 +307,6 @@ static const struct gpu_ops gp10b_ops = { .suspend_contexts = gr_gp10b_suspend_contexts, .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, - .init_sm_id_table = gr_gk20a_init_sm_id_table, .commit_inst = gr_gk20a_commit_inst, .trigger_suspend = gr_gk20a_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -445,6 +444,7 @@ static const struct gpu_ops gp10b_ops = { .get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask, .get_pd_dist_skip_table_size = gm20b_gr_config_get_pd_dist_skip_table_size, + .init_sm_id_table = gm20b_gr_config_init_sm_id_table, }, #ifdef CONFIG_GK20A_CTXSW_TRACE .fecs_trace = { diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index 53a65839a..3628fa1ea 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -40,250 +40,10 @@ #include #include - -/* - * Estimate performance if the given logical TPC in the given logical GPC were - * removed. - */ -static int gr_gv100_scg_estimate_perf(struct gk20a *g, - unsigned long *gpc_tpc_mask, - u32 disable_gpc_id, u32 disable_tpc_id, - int *perf) -{ - struct gr_gk20a *gr = &g->gr; - int err = 0; - u32 scale_factor = 512U; /* Use fx23.9 */ - u32 pix_scale = 1024U*1024U; /* Pix perf in [29:20] */ - u32 world_scale = 1024U; /* World performance in [19:10] */ - u32 tpc_scale = 1U; /* TPC balancing in [9:0] */ - u32 scg_num_pes = 0U; - u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */ - u32 average_tpcs = 0U; /* Average of # of TPCs per GPC */ - u32 deviation; /* absolute diff between TPC# and - * average_tpcs, averaged across GPCs - */ - u32 norm_tpc_deviation; /* deviation/max_tpc_per_gpc */ - u32 tpc_balance; - u32 scg_gpc_pix_perf; - u32 scg_world_perf; - u32 gpc_id; - u32 pes_id; - int diff; - bool is_tpc_removed_gpc = false; - bool is_tpc_removed_pes = false; - u32 max_tpc_gpc = 0U; - u32 num_tpc_mask; - u32 *num_tpc_gpc = nvgpu_kzalloc(g, sizeof(u32) * - nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS)); - - if (num_tpc_gpc == NULL) { - return -ENOMEM; - } - - /* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */ - for (gpc_id = 0; - gpc_id < nvgpu_gr_config_get_gpc_count(gr->config); - gpc_id++) { - num_tpc_mask = gpc_tpc_mask[gpc_id]; - - if ((gpc_id == disable_gpc_id) && - ((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) { - /* Safety check if a TPC is removed twice */ - if (is_tpc_removed_gpc) { - err = -EINVAL; - goto free_resources; - } - /* Remove logical TPC from set */ - num_tpc_mask &= ~(BIT32(disable_tpc_id)); - is_tpc_removed_gpc = true; - } - - /* track balancing of tpcs across gpcs */ - num_tpc_gpc[gpc_id] = hweight32(num_tpc_mask); - average_tpcs += num_tpc_gpc[gpc_id]; - - /* save the maximum numer of gpcs */ - max_tpc_gpc = num_tpc_gpc[gpc_id] > max_tpc_gpc ? - num_tpc_gpc[gpc_id] : max_tpc_gpc; - - /* - * Calculate ratio between TPC count and post-FS and post-SCG - * - * ratio represents relative throughput of the GPC - */ - scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] / - nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id); - - if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) { - min_scg_gpc_pix_perf = scg_gpc_pix_perf; - } - - /* Calculate # of surviving PES */ - for (pes_id = 0; - pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_id); - pes_id++) { - /* Count the number of TPC on the set */ - num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask( - gr->config, gpc_id, pes_id) & - gpc_tpc_mask[gpc_id]; - - if ((gpc_id == disable_gpc_id) && - ((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) { - - if (is_tpc_removed_pes) { - err = -EINVAL; - goto free_resources; - } - num_tpc_mask &= ~(BIT32(disable_tpc_id)); - is_tpc_removed_pes = true; - } - if (hweight32(num_tpc_mask) != 0UL) { - scg_num_pes++; - } - } - } - - if (!is_tpc_removed_gpc || !is_tpc_removed_pes) { - err = -EINVAL; - goto free_resources; - } - - if (max_tpc_gpc == 0U) { - *perf = 0; - goto free_resources; - } - - /* Now calculate perf */ - scg_world_perf = (scale_factor * scg_num_pes) / - nvgpu_gr_config_get_ppc_count(gr->config); - deviation = 0; - average_tpcs = scale_factor * average_tpcs / - nvgpu_gr_config_get_gpc_count(gr->config); - for (gpc_id =0; - gpc_id < nvgpu_gr_config_get_gpc_count(gr->config); - gpc_id++) { - diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id]; - if (diff < 0) { - diff = -diff; - } - deviation += U32(diff); - } - - deviation /= nvgpu_gr_config_get_gpc_count(gr->config); - - norm_tpc_deviation = deviation / max_tpc_gpc; - - tpc_balance = scale_factor - norm_tpc_deviation; - - if ((tpc_balance > scale_factor) || - (scg_world_perf > scale_factor) || - (min_scg_gpc_pix_perf > scale_factor) || - (norm_tpc_deviation > scale_factor)) { - err = -EINVAL; - goto free_resources; - } - - *perf = (pix_scale * min_scg_gpc_pix_perf) + - (world_scale * scg_world_perf) + - (tpc_scale * tpc_balance); -free_resources: - nvgpu_kfree(g, num_tpc_gpc); - return err; -} - void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) { } -int gr_gv100_init_sm_id_table(struct gk20a *g) -{ - unsigned long tpc; - u32 gpc, sm, pes, gtpc; - u32 sm_id = 0; - u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); - struct gr_gk20a *gr = &g->gr; - u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr->config); - int perf, maxperf; - int err = 0; - unsigned long *gpc_tpc_mask; - u32 *tpc_table, *gpc_table; - - gpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) * - sizeof(u32)); - tpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) * - sizeof(u32)); - gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) * - nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS)); - - if ((gpc_table == NULL) || - (tpc_table == NULL) || - (gpc_tpc_mask == NULL)) { - nvgpu_err(g, "Error allocating memory for sm tables"); - err = -ENOMEM; - goto exit_build_table; - } - - for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) { - for (pes = 0; - pes < nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc); - pes++) { - gpc_tpc_mask[gpc] |= nvgpu_gr_config_get_pes_tpc_mask( - g->gr.config, gpc, pes); - } - } - - for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr->config); gtpc++) { - maxperf = -1; - for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) { - for_each_set_bit(tpc, &gpc_tpc_mask[gpc], - nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) { - perf = -1; - err = gr_gv100_scg_estimate_perf(g, - gpc_tpc_mask, gpc, tpc, &perf); - - if (err != 0) { - nvgpu_err(g, - "Error while estimating perf"); - goto exit_build_table; - } - - if (perf >= maxperf) { - maxperf = perf; - gpc_table[gtpc] = gpc; - tpc_table[gtpc] = tpc; - } - } - } - gpc_tpc_mask[gpc_table[gtpc]] &= ~(BIT64(tpc_table[gtpc])); - } - - for (tpc = 0, sm_id = 0; sm_id < num_sm; tpc++, sm_id += sm_per_tpc) { - for (sm = 0; sm < sm_per_tpc; sm++) { - u32 index = sm_id + sm; - - g->gr.sm_to_cluster[index].gpc_index = gpc_table[tpc]; - g->gr.sm_to_cluster[index].tpc_index = tpc_table[tpc]; - g->gr.sm_to_cluster[index].sm_index = sm; - g->gr.sm_to_cluster[index].global_tpc_index = tpc; - nvgpu_log_info(g, - "gpc : %d tpc %d sm_index %d global_index: %d", - g->gr.sm_to_cluster[index].gpc_index, - g->gr.sm_to_cluster[index].tpc_index, - g->gr.sm_to_cluster[index].sm_index, - g->gr.sm_to_cluster[index].global_tpc_index); - - } - } - - g->gr.no_of_sm = num_sm; - nvgpu_log_info(g, " total number of sm = %d", g->gr.no_of_sm); -exit_build_table: - nvgpu_kfree(g, gpc_table); - nvgpu_kfree(g, tpc_table); - nvgpu_kfree(g, gpc_tpc_mask); - return err; -} - u32 gr_gv100_get_patch_slots(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index 23d389576..e46a01955 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h @@ -1,7 +1,7 @@ /* * GV100 GPU GR * - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -26,7 +26,6 @@ #define NVGPU_GR_GV100_H void gr_gv100_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); -int gr_gv100_init_sm_id_table(struct gk20a *g); void gr_gv100_program_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid); int gr_gv100_load_smid_config(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 58c21886a..0477ed747 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -57,6 +57,7 @@ #include "hal/fifo/fifo_intr_gv11b.h" #include "hal/gr/fecs_trace/fecs_trace_gm20b.h" #include "hal/gr/config/gr_config_gm20b.h" +#include "hal/gr/config/gr_config_gv100.h" #include "hal/gr/zbc/zbc_gp10b.h" #include "hal/gr/zbc/zbc_gv11b.h" #include "hal/gr/init/gr_init_gm20b.h" @@ -421,7 +422,6 @@ static const struct gpu_ops gv100_ops = { .suspend_contexts = gr_gp10b_suspend_contexts, .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, - .init_sm_id_table = gr_gv100_init_sm_id_table, .commit_inst = gr_gv11b_commit_inst, .trigger_suspend = gv11b_gr_sm_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -582,6 +582,7 @@ static const struct gpu_ops gv100_ops = { .get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask, .get_pd_dist_skip_table_size = gm20b_gr_config_get_pd_dist_skip_table_size, + .init_sm_id_table = gv100_gr_config_init_sm_id_table, }, #ifdef CONFIG_GK20A_CTXSW_TRACE .fecs_trace = { diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 5329b0b80..7df76508b 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -30,6 +30,7 @@ #include "hal/bus/bus_gm20b.h" #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/priv_ring/priv_ring_gp10b.h" +#include "hal/gr/config/gr_config_gv100.h" #include "hal/power_features/cg/gv11b_gating_reglist.h" #include "hal/cbc/cbc_gm20b.h" #include "hal/cbc/cbc_gp10b.h" @@ -372,7 +373,6 @@ static const struct gpu_ops gv11b_ops = { .suspend_contexts = gr_gp10b_suspend_contexts, .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, - .init_sm_id_table = gr_gv100_init_sm_id_table, .commit_inst = gr_gv11b_commit_inst, .trigger_suspend = gv11b_gr_sm_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -542,6 +542,7 @@ static const struct gpu_ops gv11b_ops = { .get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask, .get_pd_dist_skip_table_size = gm20b_gr_config_get_pd_dist_skip_table_size, + .init_sm_id_table = gv100_gr_config_init_sm_id_table, }, #ifdef CONFIG_GK20A_CTXSW_TRACE .fecs_trace = { diff --git a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c index 2e3d823cb..f5dd8a0a2 100644 --- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c @@ -28,6 +28,30 @@ #include +int gm20b_gr_config_init_sm_id_table(struct gk20a *g) +{ + u32 gpc, tpc; + u32 sm_id = 0; + + for (tpc = 0; + tpc < nvgpu_gr_config_get_max_tpc_per_gpc_count(g->gr.config); + tpc++) { + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(g->gr.config); gpc++) { + + if (tpc < nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) { + g->gr.sm_to_cluster[sm_id].tpc_index = tpc; + g->gr.sm_to_cluster[sm_id].gpc_index = gpc; + g->gr.sm_to_cluster[sm_id].sm_index = 0; + g->gr.sm_to_cluster[sm_id].global_tpc_index = + sm_id; + sm_id++; + } + } + } + g->gr.no_of_sm = sm_id; + return 0; +} + u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config, u32 gpc_index) { diff --git a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.h b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.h index ac7fd798f..9b7ce53cd 100644 --- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.h +++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.h @@ -28,6 +28,7 @@ struct gk20a; struct nvgpu_gr_config; +int gm20b_gr_config_init_sm_id_table(struct gk20a *g); u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config, u32 gpc_index); u32 gm20b_gr_config_get_tpc_count_in_gpc(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c new file mode 100644 index 000000000..048b1301b --- /dev/null +++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.c @@ -0,0 +1,266 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "gr_config_gv100.h" + +/* + * Estimate performance if the given logical TPC in the given logical GPC were + * removed. + */ +static int gr_gv100_scg_estimate_perf(struct gk20a *g, + unsigned long *gpc_tpc_mask, + u32 disable_gpc_id, u32 disable_tpc_id, + int *perf) +{ + struct gr_gk20a *gr = &g->gr; + int err = 0; + u32 scale_factor = 512U; /* Use fx23.9 */ + u32 pix_scale = 1024U*1024U; /* Pix perf in [29:20] */ + u32 world_scale = 1024U; /* World performance in [19:10] */ + u32 tpc_scale = 1U; /* TPC balancing in [9:0] */ + u32 scg_num_pes = 0U; + u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */ + u32 average_tpcs = 0U; /* Average of # of TPCs per GPC */ + u32 deviation; /* absolute diff between TPC# and + * average_tpcs, averaged across GPCs + */ + u32 norm_tpc_deviation; /* deviation/max_tpc_per_gpc */ + u32 tpc_balance; + u32 scg_gpc_pix_perf; + u32 scg_world_perf; + u32 gpc_id; + u32 pes_id; + int diff; + bool is_tpc_removed_gpc = false; + bool is_tpc_removed_pes = false; + u32 max_tpc_gpc = 0U; + u32 num_tpc_mask; + u32 *num_tpc_gpc = nvgpu_kzalloc(g, sizeof(u32) * + nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS)); + + if (num_tpc_gpc == NULL) { + return -ENOMEM; + } + + /* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */ + for (gpc_id = 0; + gpc_id < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_id++) { + num_tpc_mask = gpc_tpc_mask[gpc_id]; + + if ((gpc_id == disable_gpc_id) && + ((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) { + /* Safety check if a TPC is removed twice */ + if (is_tpc_removed_gpc) { + err = -EINVAL; + goto free_resources; + } + /* Remove logical TPC from set */ + num_tpc_mask &= ~(BIT32(disable_tpc_id)); + is_tpc_removed_gpc = true; + } + + /* track balancing of tpcs across gpcs */ + num_tpc_gpc[gpc_id] = hweight32(num_tpc_mask); + average_tpcs += num_tpc_gpc[gpc_id]; + + /* save the maximum numer of gpcs */ + max_tpc_gpc = num_tpc_gpc[gpc_id] > max_tpc_gpc ? + num_tpc_gpc[gpc_id] : max_tpc_gpc; + + /* + * Calculate ratio between TPC count and post-FS and post-SCG + * + * ratio represents relative throughput of the GPC + */ + scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc_id] / + nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_id); + + if (min_scg_gpc_pix_perf > scg_gpc_pix_perf) { + min_scg_gpc_pix_perf = scg_gpc_pix_perf; + } + + /* Calculate # of surviving PES */ + for (pes_id = 0; + pes_id < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_id); + pes_id++) { + /* Count the number of TPC on the set */ + num_tpc_mask = nvgpu_gr_config_get_pes_tpc_mask( + gr->config, gpc_id, pes_id) & + gpc_tpc_mask[gpc_id]; + + if ((gpc_id == disable_gpc_id) && + ((num_tpc_mask & BIT32(disable_tpc_id)) != 0U)) { + + if (is_tpc_removed_pes) { + err = -EINVAL; + goto free_resources; + } + num_tpc_mask &= ~(BIT32(disable_tpc_id)); + is_tpc_removed_pes = true; + } + if (hweight32(num_tpc_mask) != 0UL) { + scg_num_pes++; + } + } + } + + if (!is_tpc_removed_gpc || !is_tpc_removed_pes) { + err = -EINVAL; + goto free_resources; + } + + if (max_tpc_gpc == 0U) { + *perf = 0; + goto free_resources; + } + + /* Now calculate perf */ + scg_world_perf = (scale_factor * scg_num_pes) / + nvgpu_gr_config_get_ppc_count(gr->config); + deviation = 0; + average_tpcs = scale_factor * average_tpcs / + nvgpu_gr_config_get_gpc_count(gr->config); + for (gpc_id =0; + gpc_id < nvgpu_gr_config_get_gpc_count(gr->config); + gpc_id++) { + diff = average_tpcs - scale_factor * num_tpc_gpc[gpc_id]; + if (diff < 0) { + diff = -diff; + } + deviation += U32(diff); + } + + deviation /= nvgpu_gr_config_get_gpc_count(gr->config); + + norm_tpc_deviation = deviation / max_tpc_gpc; + + tpc_balance = scale_factor - norm_tpc_deviation; + + if ((tpc_balance > scale_factor) || + (scg_world_perf > scale_factor) || + (min_scg_gpc_pix_perf > scale_factor) || + (norm_tpc_deviation > scale_factor)) { + err = -EINVAL; + goto free_resources; + } + + *perf = (pix_scale * min_scg_gpc_pix_perf) + + (world_scale * scg_world_perf) + + (tpc_scale * tpc_balance); +free_resources: + nvgpu_kfree(g, num_tpc_gpc); + return err; +} + +int gv100_gr_config_init_sm_id_table(struct gk20a *g) +{ + unsigned long tpc; + u32 gpc, sm, pes, gtpc; + u32 sm_id = 0; + u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); + struct gr_gk20a *gr = &g->gr; + u32 num_sm = sm_per_tpc * nvgpu_gr_config_get_tpc_count(gr->config); + int perf, maxperf; + int err = 0; + unsigned long *gpc_tpc_mask; + u32 *tpc_table, *gpc_table; + + gpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) * + sizeof(u32)); + tpc_table = nvgpu_kzalloc(g, nvgpu_gr_config_get_tpc_count(gr->config) * + sizeof(u32)); + gpc_tpc_mask = nvgpu_kzalloc(g, sizeof(unsigned long) * + nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS)); + + if ((gpc_table == NULL) || + (tpc_table == NULL) || + (gpc_tpc_mask == NULL)) { + nvgpu_err(g, "Error allocating memory for sm tables"); + err = -ENOMEM; + goto exit_build_table; + } + + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) { + for (pes = 0; + pes < nvgpu_gr_config_get_gpc_ppc_count(g->gr.config, gpc); + pes++) { + gpc_tpc_mask[gpc] |= nvgpu_gr_config_get_pes_tpc_mask( + g->gr.config, gpc, pes); + } + } + + for (gtpc = 0; gtpc < nvgpu_gr_config_get_tpc_count(gr->config); gtpc++) { + maxperf = -1; + for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) { + for_each_set_bit(tpc, &gpc_tpc_mask[gpc], + nvgpu_gr_config_get_gpc_tpc_count(g->gr.config, gpc)) { + perf = -1; + err = gr_gv100_scg_estimate_perf(g, + gpc_tpc_mask, gpc, tpc, &perf); + + if (err != 0) { + nvgpu_err(g, + "Error while estimating perf"); + goto exit_build_table; + } + + if (perf >= maxperf) { + maxperf = perf; + gpc_table[gtpc] = gpc; + tpc_table[gtpc] = tpc; + } + } + } + gpc_tpc_mask[gpc_table[gtpc]] &= ~(BIT64(tpc_table[gtpc])); + } + + for (tpc = 0, sm_id = 0; sm_id < num_sm; tpc++, sm_id += sm_per_tpc) { + for (sm = 0; sm < sm_per_tpc; sm++) { + u32 index = sm_id + sm; + + g->gr.sm_to_cluster[index].gpc_index = gpc_table[tpc]; + g->gr.sm_to_cluster[index].tpc_index = tpc_table[tpc]; + g->gr.sm_to_cluster[index].sm_index = sm; + g->gr.sm_to_cluster[index].global_tpc_index = tpc; + nvgpu_log_info(g, + "gpc : %d tpc %d sm_index %d global_index: %d", + g->gr.sm_to_cluster[index].gpc_index, + g->gr.sm_to_cluster[index].tpc_index, + g->gr.sm_to_cluster[index].sm_index, + g->gr.sm_to_cluster[index].global_tpc_index); + + } + } + + g->gr.no_of_sm = num_sm; + nvgpu_log_info(g, " total number of sm = %d", g->gr.no_of_sm); +exit_build_table: + nvgpu_kfree(g, gpc_table); + nvgpu_kfree(g, tpc_table); + nvgpu_kfree(g, gpc_tpc_mask); + return err; +} diff --git a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.h b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.h new file mode 100644 index 000000000..2bb77e30f --- /dev/null +++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gv100.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_GR_CONFIG_GV100_H +#define NVGPU_GR_CONFIG_GV100_H + +#include + +struct gk20a; + +int gv100_gr_config_init_sm_id_table(struct gk20a *g); + +#endif /* NVGPU_GR_CONFIG_GV100_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 57832604d..0bd901469 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -415,7 +415,6 @@ struct gpu_ops { u32 graphics_preempt_mode, u32 compute_preempt_mode); int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost); - int (*init_sm_id_table)(struct gk20a *g); int (*init_sw_veid_bundle)(struct gk20a *g); int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va); int (*trigger_suspend)(struct gk20a *g); @@ -582,6 +581,7 @@ struct gpu_ops { struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index); u32 (*get_pd_dist_skip_table_size)(void); + int (*init_sm_id_table)(struct gk20a *g); } config; #ifdef CONFIG_GK20A_CTXSW_TRACE diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index a7206b761..0012e69ab 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -61,6 +61,7 @@ #include "hal/gr/fecs_trace/fecs_trace_gm20b.h" #include "hal/gr/fecs_trace/fecs_trace_gv11b.h" #include "hal/gr/config/gr_config_gm20b.h" +#include "hal/gr/config/gr_config_gv100.h" #include "hal/gr/zbc/zbc_gp10b.h" #include "hal/gr/zbc/zbc_gv11b.h" #include "hal/gr/zcull/zcull_gm20b.h" @@ -441,7 +442,6 @@ static const struct gpu_ops tu104_ops = { .suspend_contexts = gr_gp10b_suspend_contexts, .resume_contexts = gr_gk20a_resume_contexts, .get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags, - .init_sm_id_table = gr_gv100_init_sm_id_table, .commit_inst = gr_gv11b_commit_inst, .trigger_suspend = gv11b_gr_sm_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -610,6 +610,7 @@ static const struct gpu_ops tu104_ops = { .get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask, .get_pd_dist_skip_table_size = gm20b_gr_config_get_pd_dist_skip_table_size, + .init_sm_id_table = gv100_gr_config_init_sm_id_table, }, #ifdef CONFIG_GK20A_CTXSW_TRACE .fecs_trace = {