gpu: nvgpu: set graphics specific PRI values for graphics contexts

Add new HAL gops.gr.init.set_default_gfx_regs() to set graphics specific
PRI values for graphics contexts in function nvgpu_gr_obj_ctx_alloc().

Add new HAL gops.gr.init.capture_gfx_regs() to capture and save init
values for the PRIs. Add new struct nvgpu_gr_obj_ctx_gfx_regs to hold the
PRI init values.

Define HAL functions gv11b_gr_init_set_default_gfx_regs() and
gv11b_gr_init_capture_gfx_regs(). Set the HAL functions for
gv11b and ga10b.

Register accessors required to set PRIs are auto-generated.

Bug 3506078

Change-Id: I4c2843a274f3c924e402541e600e104ed0c9ed1c
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2671598
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Shashank Singh <shashsingh@nvidia.com>
Reviewed-by: Jonathan Mccaffrey <jmccaffrey@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Deepak Nibade
2022-02-17 18:26:34 +05:30
committed by mobile promotions
parent e4cf52123f
commit a1ef716f9d
9 changed files with 179 additions and 3 deletions

View File

@@ -755,6 +755,16 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
goto clean_up;
}
/*
* Read and save register init values that need to be configured
* differently for graphics contexts.
* Updated values are written to the context in
* gops.gr.init.set_default_gfx_regs().
*/
if (g->ops.gr.init.capture_gfx_regs != NULL) {
g->ops.gr.init.capture_gfx_regs(g, &golden_image->gfx_regs);
}
golden_image->ready = true;
#ifdef CONFIG_NVGPU_POWER_PG
nvgpu_pmu_set_golden_image_initialized(g, GOLDEN_IMG_READY);
@@ -897,6 +907,16 @@ int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
}
#endif
/*
* Register init values are saved in
* gops.gr.init.capture_gfx_regs(). Update and set the values as
* required for graphics contexts.
*/
if (g->ops.gpu_class.is_valid_gfx(class_num) &&
g->ops.gr.init.set_default_gfx_regs != NULL) {
g->ops.gr.init.set_default_gfx_regs(g, gr_ctx, &golden_image->gfx_regs);
}
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
return 0;
out:

View File

@@ -28,6 +28,19 @@
struct nvgpu_gr_global_ctx_local_golden_image;
/**
* Graphics specific context register values structure.
*
* This structure stores init values for some of the registers that need to be
* configured differently for Graphics contexts.
*/
struct nvgpu_gr_obj_ctx_gfx_regs {
u32 reg_sm_disp_ctrl;
u32 reg_gpcs_setup_debug;
u32 reg_tex_lod_dbg;
u32 reg_hww_warp_esr_report_mask;
};
/**
* Golden context image descriptor structure.
*
@@ -54,6 +67,11 @@ struct nvgpu_gr_obj_ctx_golden_image {
*/
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image;
/**
* Init values for graphics specific registers.
*/
struct nvgpu_gr_obj_ctx_gfx_regs gfx_regs;
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
/**
* Pointer to local Golden context image struct used for Golden

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -35,6 +35,7 @@ struct gk20a;
struct nvgpu_gr_config;
struct nvgpu_gr_ctx;
struct netlist_av_list;
struct nvgpu_gr_obj_ctx_gfx_regs;
u32 gv11b_gr_init_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc,
struct nvgpu_gr_config *gr_config);
@@ -87,6 +88,10 @@ u32 gv11b_gr_init_get_patch_slots(struct gk20a *g,
struct nvgpu_gr_config *config);
void gv11b_gr_init_detect_sm_arch(struct gk20a *g);
void gv11b_gr_init_capture_gfx_regs(struct gk20a *g, struct nvgpu_gr_obj_ctx_gfx_regs *gfx_regs);
void gv11b_gr_init_set_default_gfx_regs(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_obj_ctx_gfx_regs *gfx_regs);
#ifndef CONFIG_NVGPU_NON_FUSA
void gv11b_gr_init_set_default_compute_regs(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx);

View File

@@ -37,6 +37,8 @@
#include "gr_init_gm20b.h"
#include "gr_init_gv11b.h"
#include "common/gr/obj_ctx_priv.h"
#include <nvgpu/hw/gv11b/hw_gr_gv11b.h>
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
@@ -939,6 +941,61 @@ void gv11b_gr_init_detect_sm_arch(struct gk20a *g)
gr_gpc0_tpc0_sm_arch_warp_count_v(v);
}
void gv11b_gr_init_capture_gfx_regs(struct gk20a *g, struct nvgpu_gr_obj_ctx_gfx_regs *gfx_regs)
{
gfx_regs->reg_sm_disp_ctrl =
nvgpu_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r());
gfx_regs->reg_gpcs_setup_debug =
nvgpu_readl(g, gr_pri_gpcs_setup_debug_r());
gfx_regs->reg_tex_lod_dbg =
nvgpu_readl(g, gr_pri_gpcs_tpcs_tex_lod_dbg_r());
gfx_regs->reg_hww_warp_esr_report_mask =
nvgpu_readl(g, gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r());
}
void gv11b_gr_init_set_default_gfx_regs(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_obj_ctx_gfx_regs *gfx_regs)
{
u32 reg_val;
nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
reg_val = set_field(gfx_regs->reg_sm_disp_ctrl,
gr_gpcs_tpcs_sm_disp_ctrl_killed_ld_is_nop_m(),
gr_gpcs_tpcs_sm_disp_ctrl_killed_ld_is_nop_disable_f());
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_sm_disp_ctrl_r(),
reg_val, true);
reg_val = set_field(gfx_regs->reg_gpcs_setup_debug,
gr_pri_gpcs_setup_debug_poly_offset_nan_is_zero_m(),
gr_pri_gpcs_setup_debug_poly_offset_nan_is_zero_enable_f());
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pri_gpcs_setup_debug_r(),
reg_val, true);
reg_val = set_field(gfx_regs->reg_tex_lod_dbg,
gr_pri_gpcs_tpcs_tex_lod_dbg_cubeseam_aniso_m(),
gr_pri_gpcs_tpcs_tex_lod_dbg_cubeseam_aniso_enable_f());
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_pri_gpcs_tpcs_tex_lod_dbg_r(),
reg_val, true);
reg_val = set_field(gfx_regs->reg_hww_warp_esr_report_mask,
gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_addr_m(),
gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_addr_no_report_f());
reg_val = set_field(reg_val,
gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_addr_m(),
gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_addr_no_report_f());
reg_val = set_field(reg_val,
gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_m(),
gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_no_report_f());
reg_val = set_field(reg_val,
gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_tex_format_m(),
gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_tex_format_no_report_f());
nvgpu_gr_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r(),
reg_val, true);
nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
}
#ifndef CONFIG_NVGPU_NON_FUSA
void gv11b_gr_init_set_default_compute_regs(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx)

View File

@@ -645,6 +645,8 @@ static const struct gops_gr_init ga10b_ops_gr_init = {
.get_max_subctx_count = gv11b_gr_init_get_max_subctx_count,
.get_patch_slots = gv11b_gr_init_get_patch_slots,
.detect_sm_arch = gv11b_gr_init_detect_sm_arch,
.capture_gfx_regs = gv11b_gr_init_capture_gfx_regs,
.set_default_gfx_regs = gv11b_gr_init_set_default_gfx_regs,
#ifndef CONFIG_NVGPU_NON_FUSA
.set_default_compute_regs = ga10b_gr_init_set_default_compute_regs,
#endif

View File

@@ -536,6 +536,8 @@ static const struct gops_gr_init gv11b_ops_gr_init = {
.get_max_subctx_count = gv11b_gr_init_get_max_subctx_count,
.get_patch_slots = gv11b_gr_init_get_patch_slots,
.detect_sm_arch = gv11b_gr_init_detect_sm_arch,
.capture_gfx_regs = gv11b_gr_init_capture_gfx_regs,
.set_default_gfx_regs = gv11b_gr_init_set_default_gfx_regs,
#ifndef CONFIG_NVGPU_NON_FUSA
.set_default_compute_regs = gv11b_gr_init_set_default_compute_regs,
#endif

View File

@@ -47,6 +47,7 @@ struct netlist_av_list;
struct nvgpu_hw_err_inject_info_desc;
struct nvgpu_gr_sm_ecc_status;
struct nvgpu_gr_zbc_table_indices;
struct nvgpu_gr_obj_ctx_gfx_regs;
enum nvgpu_gr_sm_ecc_error_types;
@@ -692,6 +693,31 @@ struct gops_gr_init {
void (*set_default_compute_regs)(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx);
#endif
/**
* @brief Capture graphics specific register values.
*
* @param g [in] Pointer to GPU driver struct.
* @param gfx_regs [in] Pointer to struct holding gfx specific register init values.
*
* This function captures values of some registers that need to be
* configured differently only for graphics context.
*/
void (*capture_gfx_regs)(struct gk20a *g, struct nvgpu_gr_obj_ctx_gfx_regs *gfx_regs);
/**
* @brief Set graphics specific register values.
*
* @param g [in] Pointer to GPU driver struct.
* @param gr_ctx [in] Pointer to GR engine context image.
* @param gfx_regs [in] Pointer to struct holding gfx specific register init values.
*
* This function sets graphics specific register values in the
* patch context so that register values are set only for graphics
* contexts.
*/
void (*set_default_gfx_regs)(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_obj_ctx_gfx_regs *gfx_regs);
/**
* @brief Get supported preemption mode flags.
*

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -149,6 +149,12 @@
#define gr_activity_4_gpc0_v(r) (((r) >> 0U) & 0x7U)
#define gr_activity_4_gpc0_empty_v() (0x00000000U)
#define gr_activity_4_gpc0_preempted_v() (0x00000004U)
#define gr_pri_gpcs_setup_debug_r() (0x00418800U)
#define gr_pri_gpcs_setup_debug_poly_offset_nan_is_zero_m() (U32(0x1U) << 0U)
#define gr_pri_gpcs_setup_debug_poly_offset_nan_is_zero_enable_f() (0x1U)
#define gr_pri_gpcs_tpcs_tex_lod_dbg_r() (0x00419a04U)
#define gr_pri_gpcs_tpcs_tex_lod_dbg_cubeseam_aniso_m() (U32(0x1U) << 1U)
#define gr_pri_gpcs_tpcs_tex_lod_dbg_cubeseam_aniso_enable_f() (0x2U)
#define gr_pri_sked_activity_r() (0x00407054U)
#define gr_pri_gpc0_gpccs_gpc_activity0_r() (0x00502c80U)
#define gr_pri_gpc0_gpccs_gpc_activity1_r() (0x00502c84U)
@@ -850,17 +856,32 @@
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_illegal_instr_param_report_f()\
(0x800U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_reg_report_f() (0x2000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_addr_m()\
(U32(0x1U) << 14U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_addr_report_f() (0x4000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_addr_no_report_f() (0x0U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_addr_m()\
(U32(0x1U) << 15U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_addr_report_f()\
(0x8000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_addr_no_report_f()\
(0x0U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_addr_space_report_f()\
(0x10000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_m()\
(U32(0x1U) << 18U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f()\
(0x40000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_no_report_f()\
(0x0U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_fault_report_f()\
(0x800000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_tex_format_m()\
(U32(0x1U) << 24U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_tex_format_report_f()\
(0x1000000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_tex_format_no_report_f()\
(0x0U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_tex_layout_report_f()\
(0x2000000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_nack_report_f()\
@@ -1041,6 +1062,8 @@
#define gr_gpcs_tpcs_sm_disp_ctrl_r() (0x00419ba4U)
#define gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_m() (U32(0x3U) << 11U)
#define gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f() (0x1000U)
#define gr_gpcs_tpcs_sm_disp_ctrl_killed_ld_is_nop_m() (U32(0x1U) << 3U)
#define gr_gpcs_tpcs_sm_disp_ctrl_killed_ld_is_nop_disable_f() (0x0U)
#define gr_debug_0_r() (0x00400080U)
#define gr_debug_0_scg_force_slow_drain_tpc_m() (U32(0x1U) << 11U)
#define gr_debug_0_scg_force_slow_drain_tpc_enabled_f() (0x800U)

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -169,6 +169,12 @@
#define gr_activity_4_gpc0_v(r) (((r) >> 0U) & 0x7U)
#define gr_activity_4_gpc0_empty_v() (0x00000000U)
#define gr_activity_4_gpc0_preempted_v() (0x00000004U)
#define gr_pri_gpcs_setup_debug_r() (0x00418800U)
#define gr_pri_gpcs_setup_debug_poly_offset_nan_is_zero_m() (U32(0x1U) << 0U)
#define gr_pri_gpcs_setup_debug_poly_offset_nan_is_zero_enable_f() (0x1U)
#define gr_pri_gpcs_tpcs_tex_lod_dbg_r() (0x00419a04U)
#define gr_pri_gpcs_tpcs_tex_lod_dbg_cubeseam_aniso_m() (U32(0x1U) << 1U)
#define gr_pri_gpcs_tpcs_tex_lod_dbg_cubeseam_aniso_enable_f() (0x2U)
#define gr_pri_gpc0_gcc_dbg_r() (0x00501000U)
#define gr_pri_gpcs_gcc_dbg_r() (0x00419000U)
#define gr_pri_gpcs_gcc_dbg_invalidate_m() (U32(0x1U) << 1U)
@@ -1308,19 +1314,34 @@
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_illegal_instr_param_report_f()\
(0x800U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_reg_report_f() (0x2000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_addr_m()\
(U32(0x1U) << 14U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_addr_report_f() (0x4000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_oor_addr_no_report_f() (0x0U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_addr_m()\
(U32(0x1U) << 15U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_addr_report_f()\
(0x8000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_misaligned_addr_no_report_f()\
(0x0U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_addr_space_report_f()\
(0x10000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_m()\
(U32(0x1U) << 18U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_report_f()\
(0x40000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_invalid_const_addr_ldc_no_report_f()\
(0x0U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_fault_report_f()\
(0x800000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_stack_overflow_report_f()\
(0x400000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_mmu_nack_report_f()\
(0x4000000U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_tex_format_m()\
(U32(0x1U) << 24U)
#define gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_tex_format_no_report_f()\
(0x0U)
#define gr_gpcs_tpcs_tpccs_tpc_exception_en_r() (0x00419d0cU)
#define gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f() (0x2U)
#define gr_gpcs_tpcs_tpccs_tpc_exception_en_tex_enabled_f() (0x1U)
@@ -1594,6 +1615,8 @@
#define gr_gpcs_tpcs_sm_disp_ctrl_r() (0x00419ba4U)
#define gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_m() (U32(0x3U) << 11U)
#define gr_gpcs_tpcs_sm_disp_ctrl_re_suppress_disable_f() (0x1000U)
#define gr_gpcs_tpcs_sm_disp_ctrl_killed_ld_is_nop_m() (U32(0x1U) << 3U)
#define gr_gpcs_tpcs_sm_disp_ctrl_killed_ld_is_nop_disable_f() (0x0U)
#define gr_gpcs_tc_debug0_r() (0x00418708U)
#define gr_gpcs_tc_debug0_limit_coalesce_buffer_size_f(v)\
((U32(v) & 0x1ffU) << 0U)