gpu: nvgpu: create common.fbp unit

create a new unit common.fbp which initializes fbp support and provides
APIs to retrieve fbp data.

Create private header with below data
struct nvgpu_fbp {
        u32 num_fbps;
        u32 max_fbps_count;
        u32 fbp_en_mask;
        u32 *fbp_rop_l2_en_mask;
};

Expose below public APIs to initialize/remove fbp support:
nvgpu_fbp_init_support()
nvgpu_fbp_remove_support()
vgpu_fbp_init_support() for vGPU

Expose below APIs to retrieve fbp data
nvgpu_fbp_get_num_fbps()
nvgpu_fbp_get_max_fbps_count()
nvgpu_fbp_get_fbp_en_mask()
nvgpu_fbp_get_rop_l2_en_mask()

Use above APIs to retrieve fbp data in all the code.

Remove corresponding fields from struct nvgpu_gr since they are no
longer referred from that structure

Jira NVGPU-3124

Change-Id: I027caf4874b1f6154219f01902020dec4d7b0cb1
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2108617
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2019-04-26 17:51:46 +05:30
committed by mobile promotions
parent 3af5242bb0
commit d2512bd5ee
31 changed files with 328 additions and 140 deletions

View File

@@ -53,6 +53,7 @@ nvgpu-y += \
common/boardobj/boardobjgrp_e32.o \
common/regops/regops.o \
common/ltc/ltc.o \
common/fbp/fbp.o \
common/cbc/cbc.o \
common/gr/gr.o \
common/gr/gr_intr.o \
@@ -557,6 +558,7 @@ nvgpu-y += \
nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
common/vgpu/ltc/ltc_vgpu.o \
common/vgpu/cbc/cbc_vgpu.o \
common/vgpu/fbp/fbp_vgpu.o \
common/vgpu/gr/gr_vgpu.o \
common/vgpu/gr/ctx_vgpu.o \
common/vgpu/gr/subctx_vgpu.o \

View File

@@ -89,6 +89,7 @@ srcs += common/sim/sim.c \
common/rbtree.c \
common/ltc/ltc.c \
common/cbc/cbc.c \
common/fbp/fbp.c \
common/io/io.c \
common/ecc.c \
common/ce/ce.c \
@@ -482,6 +483,7 @@ srcs += common/vgpu/init/init_vgpu.c \
common/vgpu/debugger_vgpu.c \
common/vgpu/ltc/ltc_vgpu.c \
common/vgpu/cbc/cbc_vgpu.c \
common/vgpu/fbp/fbp_vgpu.c \
common/vgpu/ce_vgpu.c \
common/vgpu/gv11b/vgpu_gv11b.c \
common/vgpu/gv11b/vgpu_hal_gv11b.c \

View File

@@ -0,0 +1,119 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/kmem.h>
#include <nvgpu/log.h>
#include <nvgpu/fbp.h>
#include "fbp_priv.h"
int nvgpu_fbp_init_support(struct gk20a *g)
{
struct nvgpu_fbp *fbp;
u32 max_ltc_per_fbp;
u32 rop_l2_all_en;
u32 fbp_en_mask;
unsigned long i;
unsigned long fbp_en_mask_tmp;
u32 tmp;
if (g->fbp != NULL) {
return 0;
}
fbp = nvgpu_kzalloc(g, sizeof(*fbp));
if (fbp == NULL) {
return -ENOMEM;
}
fbp->num_fbps = g->ops.priv_ring.get_fbp_count(g);
fbp->max_fbps_count = g->ops.top.get_max_fbps_count(g);
nvgpu_log_info(g, "fbps: %d", fbp->num_fbps);
nvgpu_log_info(g, "max_fbps_count: %d", fbp->max_fbps_count);
/*
* Read active fbp mask from fuse
* Note that 0:enable and 1:disable in value read from fuse so we've to
* flip the bits.
* Also set unused bits to zero
*/
fbp_en_mask = g->ops.fuse.fuse_status_opt_fbp(g);
fbp_en_mask = ~fbp_en_mask;
fbp_en_mask = fbp_en_mask & (BIT32(fbp->max_fbps_count) - 1U);
fbp->fbp_en_mask = fbp_en_mask;
fbp->fbp_rop_l2_en_mask =
nvgpu_kzalloc(g, fbp->max_fbps_count * sizeof(u32));
if (fbp->fbp_rop_l2_en_mask == NULL) {
nvgpu_kfree(g, fbp);
return -ENOMEM;
}
fbp_en_mask_tmp = fbp_en_mask;
max_ltc_per_fbp = g->ops.top.get_max_ltc_per_fbp(g);
rop_l2_all_en = BIT32(max_ltc_per_fbp) - 1U;
/* mask of Rop_L2 for each FBP */
for_each_set_bit(i, &fbp_en_mask_tmp, fbp->max_fbps_count) {
tmp = g->ops.fuse.fuse_status_opt_rop_l2_fbp(g, i);
fbp->fbp_rop_l2_en_mask[i] = rop_l2_all_en ^ tmp;
}
g->fbp = fbp;
return 0;
}
void nvgpu_fbp_remove_support(struct gk20a *g)
{
struct nvgpu_fbp *fbp = g->fbp;
if (fbp != NULL) {
nvgpu_kfree(g, fbp->fbp_rop_l2_en_mask);
nvgpu_kfree(g, fbp);
}
g->fbp = NULL;
}
u32 nvgpu_fbp_get_num_fbps(struct nvgpu_fbp *fbp)
{
return fbp->num_fbps;
}
u32 nvgpu_fbp_get_max_fbps_count(struct nvgpu_fbp *fbp)
{
return fbp->max_fbps_count;
}
u32 nvgpu_fbp_get_fbp_en_mask(struct nvgpu_fbp *fbp)
{
return fbp->fbp_en_mask;
}
u32 *nvgpu_fbp_get_rop_l2_en_mask(struct nvgpu_fbp *fbp)
{
return fbp->fbp_rop_l2_en_mask;
}

View File

@@ -0,0 +1,33 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_FBP_PRIV_H
#define NVGPU_FBP_PRIV_H
struct nvgpu_fbp {
u32 num_fbps;
u32 max_fbps_count;
u32 fbp_en_mask;
u32 *fbp_rop_l2_en_mask;
};
#endif /* NVGPU_FBP_PRIV_H */

View File

@@ -261,9 +261,6 @@ static void gr_remove_support(struct gk20a *g)
nvgpu_gr_config_deinit(g, gr->config);
nvgpu_kfree(g, gr->fbp_rop_l2_en_mask);
gr->fbp_rop_l2_en_mask = NULL;
nvgpu_netlist_deinit_ctx_vars(g);
nvgpu_gr_hwpm_map_deinit(g, gr->hwpm_map);
@@ -322,25 +319,6 @@ static int gr_init_config(struct gk20a *g, struct nvgpu_gr *gr)
return -ENOMEM;
}
gr->num_fbps = g->ops.priv_ring.get_fbp_count(g);
gr->max_fbps_count = g->ops.top.get_max_fbps_count(g);
gr->fbp_en_mask = g->ops.gr.init.get_fbp_en_mask(g);
if (gr->fbp_rop_l2_en_mask == NULL) {
gr->fbp_rop_l2_en_mask =
nvgpu_kzalloc(g, gr->max_fbps_count * sizeof(u32));
if (gr->fbp_rop_l2_en_mask == NULL) {
goto clean_up;
}
} else {
(void) memset(gr->fbp_rop_l2_en_mask, 0, gr->max_fbps_count *
sizeof(u32));
}
nvgpu_log_info(g, "fbps: %d", gr->num_fbps);
nvgpu_log_info(g, "max_fbps_count: %d", gr->max_fbps_count);
nvgpu_log_info(g, "bundle_cb_default_size: %d",
g->ops.gr.init.get_bundle_cb_default_size(g));
nvgpu_log_info(g, "min_gpm_fifo_depth: %d",
@@ -359,9 +337,6 @@ static int gr_init_config(struct gk20a *g, struct nvgpu_gr *gr)
nvgpu_gr_config_get_tpc_count(gr->config)));
return 0;
clean_up:
return -ENOMEM;
}
static int nvgpu_gr_init_ctx_state(struct gk20a *g)

View File

@@ -41,9 +41,6 @@ struct nvgpu_gr {
struct nvgpu_cond init_wq;
bool initialized;
u32 num_fbps;
u32 max_fbps_count;
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer;
struct nvgpu_gr_obj_ctx_golden_image *golden_image;
@@ -69,9 +66,6 @@ struct nvgpu_gr {
u32 cilp_preempt_pending_chid;
u32 fbp_en_mask;
u32 *fbp_rop_l2_en_mask;
struct nvgpu_mutex ctxsw_disable_mutex;
int ctxsw_disable_count;
};

View File

@@ -26,6 +26,7 @@
#include <nvgpu/sort.h>
#include <nvgpu/kmem.h>
#include <nvgpu/bsearch.h>
#include <nvgpu/fbp.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/hwpm_map.h>
@@ -380,6 +381,7 @@ static int nvgpu_gr_hwpm_map_create(struct gk20a *g,
u32 offset = 0;
int ret;
u32 active_fbpa_mask;
u32 num_fbps = nvgpu_fbp_get_num_fbps(g->fbp);
u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
u32 num_fbpas = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE);
@@ -438,7 +440,7 @@ static int nvgpu_gr_hwpm_map_create(struct gk20a *g,
/* Add entries from _LIST_nv_perf_fbp_ctx_regs */
if (add_ctxsw_buffer_map_entries_subunits(map,
nvgpu_netlist_get_fbp_ctxsw_regs(g), &count, &offset,
hwpm_ctxsw_reg_count_max, 0, g->gr->num_fbps, ~U32(0U),
hwpm_ctxsw_reg_count_max, 0, num_fbps, ~U32(0U),
g->ops.perf.get_pmm_per_chiplet_offset(),
~U32(0U)) != 0) {
goto cleanup;
@@ -448,7 +450,7 @@ static int nvgpu_gr_hwpm_map_create(struct gk20a *g,
if (add_ctxsw_buffer_map_entries_subunits(map,
nvgpu_netlist_get_fbp_router_ctxsw_regs(g),
&count, &offset, hwpm_ctxsw_reg_count_max, 0,
g->gr->num_fbps, ~U32(0U), NV_PERF_PMM_FBP_ROUTER_STRIDE,
num_fbps, ~U32(0U), NV_PERF_PMM_FBP_ROUTER_STRIDE,
~U32(0U)) != 0) {
goto cleanup;
}

View File

@@ -36,6 +36,7 @@
#include <nvgpu/ltc.h>
#include <nvgpu/cbc.h>
#include <nvgpu/ecc.h>
#include <nvgpu/fbp.h>
#include <nvgpu/vidmem.h>
#include <nvgpu/mm.h>
#include <nvgpu/soc.h>
@@ -371,6 +372,13 @@ int gk20a_finalize_poweron(struct gk20a *g)
goto done;
}
err = nvgpu_fbp_init_support(g);
if (err != 0) {
nvgpu_err(g, "failed to init gk20a fbp");
nvgpu_mutex_release(&g->tpc_pg_lock);
goto done;
}
err = nvgpu_gr_init_support(g);
if (err != 0) {
nvgpu_err(g, "failed to init gk20a gr");
@@ -570,8 +578,6 @@ void gk20a_init_gpu_characteristics(struct gk20a *g)
if (g->ops.gr.init_cyclestats != NULL) {
g->ops.gr.init_cyclestats(g);
}
g->ops.gr.get_rop_l2_en_mask(g);
}
static struct gk20a *gk20a_from_refcount(struct nvgpu_ref *refcount)

View File

@@ -0,0 +1,63 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/vgpu/vgpu.h>
#include <nvgpu/vgpu/tegra_vgpu.h>
#include <nvgpu/gk20a.h>
#include "fbp_vgpu.h"
#include "common/fbp/fbp_priv.h"
int vgpu_fbp_init_support(struct gk20a *g)
{
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
struct nvgpu_fbp *fbp;
u32 i;
if (g->fbp != NULL) {
return 0;
}
fbp = nvgpu_kzalloc(g, sizeof(*fbp));
if (fbp == NULL) {
return -ENOMEM;
}
fbp->num_fbps = priv->constants.num_fbps;
fbp->max_fbps_count = priv->constants.num_fbps;
fbp->fbp_en_mask = priv->constants.fbp_en_mask;
fbp->fbp_rop_l2_en_mask =
nvgpu_kzalloc(g, fbp->max_fbps_count * sizeof(u32));
if (fbp->fbp_rop_l2_en_mask == NULL) {
nvgpu_kfree(g, fbp);
return -ENOMEM;
}
for (i = 0U; i < fbp->max_fbps_count; i++) {
fbp->fbp_rop_l2_en_mask[i] = priv->constants.l2_en_mask[i];
}
g->fbp = fbp;
return 0;
}

View File

@@ -0,0 +1,30 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_FBP_VGPU_H
#define NVGPU_FBP_VGPU_H
struct gk20a;
int vgpu_fbp_init_support(struct gk20a *g);
#endif /* NVGPU_FBP_VGPU_H */

View File

@@ -138,7 +138,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.get_tpc_num = gr_gm20b_get_tpc_num,
.dump_gr_regs = NULL,
.update_pc_sampling = vgpu_gr_update_pc_sampling,
.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
.init_cyclestats = vgpu_gr_init_cyclestats,
.set_sm_debug_mode = vgpu_gr_set_sm_debug_mode,
@@ -307,7 +306,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
#endif /* CONFIG_GK20A_CTXSW_TRACE */
.init = {
.get_no_of_sm = nvgpu_gr_get_no_of_sm,
.get_fbp_en_mask = vgpu_gr_get_fbp_en_mask,
.fs_state = vgpu_gr_init_fs_state,
.get_bundle_cb_default_size =
gm20b_gr_init_get_bundle_cb_default_size,

View File

@@ -556,15 +556,6 @@ u32 vgpu_gr_get_max_fbps_count(struct gk20a *g)
return priv->constants.num_fbps;
}
u32 vgpu_gr_get_fbp_en_mask(struct gk20a *g)
{
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
nvgpu_log_fn(g, " ");
return priv->constants.fbp_en_mask;
}
u32 vgpu_gr_get_max_ltc_per_fbp(struct gk20a *g)
{
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
@@ -583,29 +574,6 @@ u32 vgpu_gr_get_max_lts_per_ltc(struct gk20a *g)
return priv->constants.max_lts_per_ltc;
}
u32 *vgpu_gr_rop_l2_en_mask(struct gk20a *g)
{
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
u32 i, max_fbps_count = priv->constants.num_fbps;
nvgpu_log_fn(g, " ");
if (g->gr->fbp_rop_l2_en_mask == NULL) {
g->gr->fbp_rop_l2_en_mask =
nvgpu_kzalloc(g, max_fbps_count * sizeof(u32));
if (!g->gr->fbp_rop_l2_en_mask) {
return NULL;
}
}
g->gr->max_fbps_count = max_fbps_count;
for (i = 0; i < max_fbps_count; i++) {
g->gr->fbp_rop_l2_en_mask[i] = priv->constants.l2_en_mask[i];
}
return g->gr->fbp_rop_l2_en_mask;
}
int vgpu_gr_add_zbc(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
struct nvgpu_gr_zbc_entry *zbc_val)
{
@@ -694,9 +662,6 @@ static void vgpu_remove_gr_support(struct gk20a *g)
nvgpu_gr_config_deinit(gr->g, gr->config);
nvgpu_gr_zcull_deinit(gr->g, gr->zcull);
nvgpu_kfree(gr->g, gr->fbp_rop_l2_en_mask);
gr->fbp_rop_l2_en_mask = NULL;
}
static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)

View File

@@ -57,10 +57,8 @@ int vgpu_gr_get_zcull_info(struct gk20a *g,
u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config,
u32 gpc_index);
u32 vgpu_gr_get_max_fbps_count(struct gk20a *g);
u32 vgpu_gr_get_fbp_en_mask(struct gk20a *g);
u32 vgpu_gr_get_max_ltc_per_fbp(struct gk20a *g);
u32 vgpu_gr_get_max_lts_per_ltc(struct gk20a *g);
u32 *vgpu_gr_rop_l2_en_mask(struct gk20a *g);
int vgpu_gr_add_zbc(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
struct nvgpu_gr_zbc_entry *zbc_val);
int vgpu_gr_query_zbc(struct gk20a *g, struct nvgpu_gr_zbc *zbc,

View File

@@ -164,7 +164,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.get_tpc_num = gr_gm20b_get_tpc_num,
.dump_gr_regs = NULL,
.update_pc_sampling = vgpu_gr_update_pc_sampling,
.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
.init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info,
.init_cyclestats = vgpu_gr_init_cyclestats,
.set_sm_debug_mode = vgpu_gr_set_sm_debug_mode,
@@ -359,7 +358,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.get_no_of_sm = nvgpu_gr_get_no_of_sm,
.get_nonpes_aware_tpc =
gv11b_gr_init_get_nonpes_aware_tpc,
.get_fbp_en_mask = vgpu_gr_get_fbp_en_mask,
.fs_state = vgpu_gr_init_fs_state,
.get_bundle_cb_default_size =
gv11b_gr_init_get_bundle_cb_default_size,

View File

@@ -32,6 +32,7 @@
#include <nvgpu/string.h>
#include <nvgpu/ltc.h>
#include <nvgpu/cbc.h>
#include <nvgpu/fbp.h>
#include <nvgpu/cyclestats_snapshot.h>
#include "init_vgpu.h"
@@ -40,6 +41,7 @@
#include "common/vgpu/fifo/fifo_vgpu.h"
#include "common/vgpu/mm/mm_vgpu.h"
#include "common/vgpu/gr/gr_vgpu.h"
#include "common/vgpu/fbp/fbp_vgpu.h"
#include "common/vgpu/ivc/comm_vgpu.h"
#include "common/gr/gr_priv.h"
@@ -86,6 +88,8 @@ void vgpu_remove_support_common(struct gk20a *g)
nvgpu_free_cyclestats_snapshot_data(g);
#endif
nvgpu_fbp_remove_support(g);
msg.event = TEGRA_VGPU_EVENT_ABORT;
err = vgpu_ivc_send(vgpu_ivc_get_peer_self(), TEGRA_VGPU_QUEUE_INTR,
&msg, sizeof(msg));
@@ -174,6 +178,12 @@ int vgpu_finalize_poweron_common(struct gk20a *g)
return err;
}
err = vgpu_fbp_init_support(g);
if (err != 0) {
nvgpu_err(g, "failed to init gk20a fbp");
return err;
}
err = vgpu_init_gr_support(g);
if (err != 0) {
nvgpu_err(g, "failed to init gk20a gr");

View File

@@ -464,28 +464,6 @@ int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
return 0;
}
u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
{
struct nvgpu_gr *gr = g->gr;
unsigned long i;
u32 tmp, max_fbps_count, max_ltc_per_fbp;
unsigned long fbp_en_mask;
u32 rop_l2_all_en;
max_fbps_count = g->ops.top.get_max_fbps_count(g);
max_ltc_per_fbp = g->ops.top.get_max_ltc_per_fbp(g);
rop_l2_all_en = BIT32(max_ltc_per_fbp) - 1U;
fbp_en_mask = g->ops.gr.init.get_fbp_en_mask(g);
/* mask of Rop_L2 for each FBP */
for_each_set_bit(i, &fbp_en_mask, max_fbps_count) {
tmp = g->ops.fuse.fuse_status_opt_rop_l2_fbp(g, i);
gr->fbp_rop_l2_en_mask[i] = rop_l2_all_en ^ tmp;
}
return gr->fbp_rop_l2_en_mask;
}
void gr_gm20b_init_cyclestats(struct gk20a *g)
{
#if defined(CONFIG_GK20A_CYCLE_STATS)

View File

@@ -53,7 +53,6 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o);
int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
bool enable);
u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g);
void gr_gm20b_init_cyclestats(struct gk20a *g);
void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state);
int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc,

View File

@@ -29,6 +29,7 @@
#include <nvgpu/gk20a.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/fbp.h>
#include "gr_gk20a.h"
#include "gr_pri_gk20a.h"
@@ -164,7 +165,7 @@ void gr_gv100_init_hwpm_pmm_register(struct gk20a *g)
g->ops.gr.set_pmm_register(g, perf_pmmsys_engine_sel_r(0),
0xFFFFFFFFU, 1U, num_sys_perfmon);
g->ops.gr.set_pmm_register(g, perf_pmmfbp_engine_sel_r(0),
0xFFFFFFFFU, g->gr->num_fbps, num_fbp_perfmon);
0xFFFFFFFFU, nvgpu_fbp_get_num_fbps(g->fbp), num_fbp_perfmon);
g->ops.gr.set_pmm_register(g, perf_pmmgpc_engine_sel_r(0),
0xFFFFFFFFU, nvgpu_gr_config_get_gpc_count(g->gr->config),
num_gpc_perfmon);

View File

@@ -41,6 +41,7 @@
#include <nvgpu/channel.h>
#include <nvgpu/engines.h>
#include <nvgpu/engine_status.h>
#include <nvgpu/fbp.h>
#include "gr_pri_gk20a.h"
#include "gr_pri_gv11b.h"
@@ -2651,7 +2652,7 @@ static void gr_gv11b_split_pmm_fbp_broadcast_address(struct gk20a *g,
u32 fbp_num = 0;
u32 base = 0;
for (fbp_num = 0; fbp_num < g->gr->num_fbps; fbp_num++) {
for (fbp_num = 0; fbp_num < nvgpu_fbp_get_num_fbps(g->fbp); fbp_num++) {
base = perf_pmmfbp_base_v() +
(fbp_num * g->ops.perf.get_pmm_per_chiplet_offset());

View File

@@ -43,26 +43,6 @@
#define FE_PWR_MODE_TIMEOUT_DEFAULT_US 10U
#define FECS_CTXSW_RESET_DELAY_US 10U
u32 gm20b_gr_init_get_fbp_en_mask(struct gk20a *g)
{
u32 fbp_en_mask;
u32 max_fbps_count;
max_fbps_count = g->ops.top.get_max_fbps_count(g);
/*
* Read active fbp mask from fuse
* Note that 0:enable and 1:disable in value read from fuse so we've to
* flip the bits.
* Also set unused bits to zero
*/
fbp_en_mask = g->ops.fuse.fuse_status_opt_fbp(g);
fbp_en_mask = ~fbp_en_mask;
fbp_en_mask = fbp_en_mask & (BIT32(max_fbps_count) - 1U);
return fbp_en_mask;
}
void gm20b_gr_init_lg_coalesce(struct gk20a *g, u32 data)
{
u32 val;

View File

@@ -35,7 +35,6 @@ struct nvgpu_gr_config;
struct netlist_av_list;
struct nvgpu_gr_config;
u32 gm20b_gr_init_get_fbp_en_mask(struct gk20a *g);
void gm20b_gr_init_lg_coalesce(struct gk20a *g, u32 data);
void gm20b_gr_init_su_coalesce(struct gk20a *g, u32 data);
void gm20b_gr_init_pes_vsc_stream(struct gk20a *g);

View File

@@ -263,7 +263,6 @@ static const struct gpu_ops gm20b_ops = {
.get_tpc_num = gr_gm20b_get_tpc_num,
.dump_gr_regs = gr_gm20b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
.init_cyclestats = gr_gm20b_init_cyclestats,
.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
@@ -429,7 +428,6 @@ static const struct gpu_ops gm20b_ops = {
.get_no_of_sm = nvgpu_gr_get_no_of_sm,
.wait_initialized = nvgpu_gr_wait_initialized,
.ecc_scrub_reg = NULL,
.get_fbp_en_mask = gm20b_gr_init_get_fbp_en_mask,
.lg_coalesce = gm20b_gr_init_lg_coalesce,
.su_coalesce = gm20b_gr_init_su_coalesce,
.pes_vsc_stream = gm20b_gr_init_pes_vsc_stream,

View File

@@ -291,7 +291,6 @@ static const struct gpu_ops gp10b_ops = {
.get_tpc_num = gr_gm20b_get_tpc_num,
.dump_gr_regs = gr_gp10b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
.init_cyclestats = gr_gm20b_init_cyclestats,
.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
@@ -476,7 +475,6 @@ static const struct gpu_ops gp10b_ops = {
.get_no_of_sm = nvgpu_gr_get_no_of_sm,
.wait_initialized = nvgpu_gr_wait_initialized,
.ecc_scrub_reg = NULL,
.get_fbp_en_mask = gm20b_gr_init_get_fbp_en_mask,
.lg_coalesce = gm20b_gr_init_lg_coalesce,
.su_coalesce = gm20b_gr_init_su_coalesce,
.pes_vsc_stream = gm20b_gr_init_pes_vsc_stream,

View File

@@ -399,7 +399,6 @@ static const struct gpu_ops gv100_ops = {
.get_tpc_num = gr_gm20b_get_tpc_num,
.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info,
.init_cyclestats = gr_gm20b_init_cyclestats,
.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode,
@@ -612,7 +611,6 @@ static const struct gpu_ops gv100_ops = {
gv11b_gr_init_get_nonpes_aware_tpc,
.wait_initialized = nvgpu_gr_wait_initialized,
.ecc_scrub_reg = NULL,
.get_fbp_en_mask = gm20b_gr_init_get_fbp_en_mask,
.lg_coalesce = gm20b_gr_init_lg_coalesce,
.su_coalesce = gm20b_gr_init_su_coalesce,
.pes_vsc_stream = gm20b_gr_init_pes_vsc_stream,

View File

@@ -367,7 +367,6 @@ static const struct gpu_ops gv11b_ops = {
.powergate_tpc = gr_gv11b_powergate_tpc,
.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info,
.init_cyclestats = gr_gm20b_init_cyclestats,
.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode,
@@ -585,7 +584,6 @@ static const struct gpu_ops gv11b_ops = {
gv11b_gr_init_get_nonpes_aware_tpc,
.wait_initialized = nvgpu_gr_wait_initialized,
.ecc_scrub_reg = gv11b_gr_init_ecc_scrub_reg,
.get_fbp_en_mask = gm20b_gr_init_get_fbp_en_mask,
.lg_coalesce = gm20b_gr_init_lg_coalesce,
.su_coalesce = gm20b_gr_init_su_coalesce,
.pes_vsc_stream = gm20b_gr_init_pes_vsc_stream,

View File

@@ -423,7 +423,6 @@ static const struct gpu_ops tu104_ops = {
.get_tpc_num = gr_gm20b_get_tpc_num,
.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
.update_pc_sampling = gr_gm20b_update_pc_sampling,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.init_sm_dsm_reg_info = gr_tu104_init_sm_dsm_reg_info,
.init_cyclestats = gr_gm20b_init_cyclestats,
.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode,
@@ -642,7 +641,6 @@ static const struct gpu_ops tu104_ops = {
gv11b_gr_init_get_nonpes_aware_tpc,
.wait_initialized = nvgpu_gr_wait_initialized,
.ecc_scrub_reg = NULL,
.get_fbp_en_mask = gm20b_gr_init_get_fbp_en_mask,
.lg_coalesce = gm20b_gr_init_lg_coalesce,
.su_coalesce = gm20b_gr_init_su_coalesce,
.pes_vsc_stream = gm20b_gr_init_pes_vsc_stream,

View File

@@ -28,6 +28,7 @@
#include <nvgpu/enabled.h>
#include <nvgpu/bug.h>
#include <nvgpu/ltc.h>
#include <nvgpu/fbp.h>
#include <nvgpu/io.h>
#include <nvgpu/utils.h>
#include <nvgpu/gk20a.h>
@@ -178,7 +179,7 @@ int gm20b_determine_L2_size_bytes(struct gk20a *g)
sets = 0U;
}
active_ltcs = g->gr->num_fbps;
active_ltcs = nvgpu_fbp_get_num_fbps(g->fbp);
/* chip-specific values */
lts_per_ltc = 2U;

View File

@@ -0,0 +1,39 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_FBP_H
#define NVGPU_FBP_H
#include <nvgpu/types.h>
struct gk20a;
struct nvgpu_fbp;
int nvgpu_fbp_init_support(struct gk20a *g);
void nvgpu_fbp_remove_support(struct gk20a *g);
u32 nvgpu_fbp_get_num_fbps(struct nvgpu_fbp *fbp);
u32 nvgpu_fbp_get_max_fbps_count(struct nvgpu_fbp *fbp);
u32 nvgpu_fbp_get_fbp_en_mask(struct nvgpu_fbp *fbp);
u32 *nvgpu_fbp_get_rop_l2_en_mask(struct nvgpu_fbp *fbp);
#endif

View File

@@ -28,6 +28,7 @@ struct gk20a;
struct fifo_gk20a;
struct channel_gk20a;
struct nvgpu_gr;
struct nvgpu_fbp;
struct sim_nvgpu;
struct nvgpu_ce_app;
struct gk20a_ctxsw_trace;
@@ -327,7 +328,6 @@ struct gpu_ops {
struct gk20a_debug_output *o);
int (*update_pc_sampling)(struct channel_gk20a *ch,
bool enable);
u32* (*get_rop_l2_en_mask)(struct gk20a *g);
void (*init_sm_dsm_reg_info)(void);
void (*init_ovr_sm_dsm_perf)(void);
void (*init_cyclestats)(struct gk20a *g);
@@ -685,7 +685,6 @@ struct gpu_ops {
void (*wait_initialized)(struct gk20a *g);
void (*ecc_scrub_reg)(struct gk20a *g,
struct nvgpu_gr_config *gr_config);
u32 (*get_fbp_en_mask)(struct gk20a *g);
void (*lg_coalesce)(struct gk20a *g, u32 data);
void (*su_coalesce)(struct gk20a *g, u32 data);
void (*pes_vsc_stream)(struct gk20a *g);
@@ -1967,6 +1966,7 @@ struct gk20a {
struct fifo_gk20a fifo;
struct nvgpu_nvlink_dev nvlink;
struct nvgpu_gr *gr;
struct nvgpu_fbp *fbp;
struct sim_nvgpu *sim;
struct mm_gk20a mm;
struct nvgpu_pmu pmu;

View File

@@ -30,6 +30,7 @@
#include <nvgpu/enabled.h>
#include <nvgpu/sizes.h>
#include <nvgpu/list.h>
#include <nvgpu/fbp.h>
#include <nvgpu/clk_arb.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/gr/config.h>
@@ -352,8 +353,8 @@ gk20a_ctrl_ioctl_gpu_characteristics(
gpu.gpu_va_bit_count = 40;
strlcpy(gpu.chipname, g->name, sizeof(gpu.chipname));
gpu.max_fbps_count = g->ops.top.get_max_fbps_count(g);
gpu.fbp_en_mask = g->ops.gr.init.get_fbp_en_mask(g);
gpu.max_fbps_count = nvgpu_fbp_get_max_fbps_count(g->fbp);
gpu.fbp_en_mask = nvgpu_fbp_get_fbp_en_mask(g->fbp);;
gpu.max_ltc_per_fbp = g->ops.top.get_max_ltc_per_fbp(g);
gpu.max_lts_per_ltc = g->ops.top.get_max_lts_per_ltc(g);
gpu.gr_compbit_store_base_hw = g->cbc->compbit_store.base_hw;
@@ -592,9 +593,10 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
static int gk20a_ctrl_get_fbp_l2_masks(
struct gk20a *g, struct nvgpu_gpu_get_fbp_l2_masks_args *args)
{
struct nvgpu_gr *gr = g->gr;
int err = 0;
const u32 fbp_l2_mask_size = sizeof(u32) * gr->max_fbps_count;
const u32 fbp_l2_mask_size = sizeof(u32) *
nvgpu_fbp_get_max_fbps_count(g->fbp);
u32 *fbp_rop_l2_en_mask = nvgpu_fbp_get_rop_l2_en_mask(g->fbp);
if (args->mask_buf_size > 0) {
size_t write_size = fbp_l2_mask_size;
@@ -605,7 +607,7 @@ static int gk20a_ctrl_get_fbp_l2_masks(
err = copy_to_user((void __user *)(uintptr_t)
args->mask_buf_addr,
gr->fbp_rop_l2_en_mask, write_size);
fbp_rop_l2_en_mask, write_size);
}
if (err == 0)

View File

@@ -40,6 +40,7 @@
#include <nvgpu/kmem.h>
#include <nvgpu/nvgpu_common.h>
#include <nvgpu/soc.h>
#include <nvgpu/fbp.h>
#include <nvgpu/enabled.h>
#include <nvgpu/debug.h>
#include <nvgpu/vidmem.h>
@@ -782,6 +783,8 @@ void gk20a_remove_support(struct gk20a *g)
nvgpu_free_cyclestats_snapshot_data(g);
#endif
nvgpu_fbp_remove_support(g);
nvgpu_remove_usermode_support(g);
nvgpu_free_enabled_flags(g);