gpu: nvgpu: add sw method for SET_BES_CROP_DEBUG4

Added sw method support for SET_BES_CROP_DEBUG4.
In this sw method:
CLAMP_FP_BLEND_TO_MAXVAL forces overflow and
CLAMP_FP_BLEND_TO_INF blend results to clamp to FP maxval.

Added support for this sw method in gp10b/gp106/gv11b
and gv100.

Bug 2046636

Change-Id: I3a9e97587aca76718f7f504ea3b853f87409092a
Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1641529
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
seshendra Gadagottu
2018-01-18 11:02:08 -08:00
committed by mobile promotions
parent f6d898656a
commit 193a2ed38c
15 changed files with 111 additions and 2 deletions

View File

@@ -204,6 +204,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.init_preemption_state = gr_gp10b_init_preemption_state,
.update_boosted_ctx = NULL,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
.create_gr_sysfs = gr_gp10b_create_sysfs,
.set_ctxsw_preemption_mode =
vgpu_gr_gp10b_set_ctxsw_preemption_mode,

View File

@@ -222,6 +222,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.init_preemption_state = NULL,
.update_boosted_ctx = NULL,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
.create_gr_sysfs = gr_gv11b_create_sysfs,
.set_ctxsw_preemption_mode = vgpu_gr_gp10b_set_ctxsw_preemption_mode,
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,

View File

@@ -205,6 +205,7 @@ struct gpu_ops {
u32 data);
void (*set_circular_buffer_size)(struct gk20a *g, u32 data);
void (*set_bes_crop_debug3)(struct gk20a *g, u32 data);
void (*set_bes_crop_debug4)(struct gk20a *g, u32 data);
void (*enable_hww_exceptions)(struct gk20a *g);
bool (*is_valid_class)(struct gk20a *g, u32 class_num);
bool (*is_valid_gfx_class)(struct gk20a *g, u32 class_num);

View File

@@ -110,6 +110,9 @@ int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr,
case NVC097_SET_BES_CROP_DEBUG3:
g->ops.gr.set_bes_crop_debug3(g, data);
break;
case NVC097_SET_BES_CROP_DEBUG4:
g->ops.gr.set_bes_crop_debug4(g, data);
break;
default:
goto fail;
}

View File

@@ -368,6 +368,7 @@ static const struct gpu_ops gp106_ops = {
.init_preemption_state = NULL,
.update_boosted_ctx = NULL,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
.create_gr_sysfs = NULL,
.set_ctxsw_preemption_mode = gr_gp106_set_ctxsw_preemption_mode,
.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode

View File

@@ -695,6 +695,30 @@ void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data)
gk20a_writel(g, gr_bes_crop_debug3_r(), val);
}
void gr_gp10b_set_bes_crop_debug4(struct gk20a *g, u32 data)
{
u32 val;
nvgpu_log_fn(g, " ");
val = gk20a_readl(g, gr_bes_crop_debug4_r());
if (data & NVC097_BES_CROP_DEBUG4_CLAMP_FP_BLEND_TO_MAXVAL) {
val = set_field(val,
gr_bes_crop_debug4_clamp_fp_blend_m(),
gr_bes_crop_debug4_clamp_fp_blend_to_maxval_f());
} else if (data & NVC097_BES_CROP_DEBUG4_CLAMP_FP_BLEND_TO_INF) {
val = set_field(val,
gr_bes_crop_debug4_clamp_fp_blend_m(),
gr_bes_crop_debug4_clamp_fp_blend_to_inf_f());
} else {
nvgpu_warn(g,
"gr_gp10b_set_bes_crop_debug4: wrong data sent!");
return;
}
gk20a_writel(g, gr_bes_crop_debug4_r(), val);
}
int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
u32 class_num, u32 offset, u32 data)
{
@@ -736,6 +760,9 @@ int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
case NVC097_SET_BES_CROP_DEBUG3:
g->ops.gr.set_bes_crop_debug3(g, data);
break;
case NVC097_SET_BES_CROP_DEBUG4:
g->ops.gr.set_bes_crop_debug4(g, data);
break;
default:
goto fail;
}

View File

@@ -49,9 +49,13 @@ enum {
#define NVC097_SET_CIRCULAR_BUFFER_SIZE 0x1280
#define NVC097_SET_SHADER_EXCEPTIONS 0x1528
#define NVC097_SET_BES_CROP_DEBUG3 0x10c4
#define NVC097_SET_BES_CROP_DEBUG4 0x10b0
#define NVC0C0_SET_SHADER_EXCEPTIONS 0x1528
#define NVC0C0_SET_RD_COALESCE 0x0228
#define NVC097_BES_CROP_DEBUG4_CLAMP_FP_BLEND_TO_INF 0x0
#define NVC097_BES_CROP_DEBUG4_CLAMP_FP_BLEND_TO_MAXVAL 0x1
int gr_gp10b_init_fs_state(struct gk20a *g);
int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
struct nvgpu_mem *mem);
@@ -85,6 +89,7 @@ int gr_gp10b_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
u32 gr_gp10b_pagepool_default_size(struct gk20a *g);
int gr_gp10b_calc_global_ctx_buffer_size(struct gk20a *g);
void gr_gp10b_set_bes_crop_debug3(struct gk20a *g, u32 data);
void gr_gp10b_set_bes_crop_debug4(struct gk20a *g, u32 data);
int gr_gp10b_handle_sw_method(struct gk20a *g, u32 addr,
u32 class_num, u32 offset, u32 data);
void gr_gp10b_cb_size_default(struct gk20a *g);

View File

@@ -400,6 +400,7 @@ static const struct gpu_ops gv100_ops = {
.init_preemption_state = NULL,
.update_boosted_ctx = gr_gp10b_update_boosted_ctx,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
.create_gr_sysfs = gr_gv11b_create_sysfs,
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,

View File

@@ -1193,6 +1193,9 @@ int gr_gv11b_handle_sw_method(struct gk20a *g, u32 addr,
case NVC397_SET_BES_CROP_DEBUG3:
g->ops.gr.set_bes_crop_debug3(g, data);
break;
case NVC397_SET_BES_CROP_DEBUG4:
g->ops.gr.set_bes_crop_debug4(g, data);
break;
default:
goto fail;
}

View File

@@ -65,6 +65,7 @@ enum {
#define NVC397_SET_TEX_IN_DBG 0x10bc
#define NVC397_SET_SKEDCHECK 0x10c0
#define NVC397_SET_BES_CROP_DEBUG3 0x10c4
#define NVC397_SET_BES_CROP_DEBUG4 0x10b0
#define NVC397_SET_TEX_IN_DBG_TSL1_RVCH_INVALIDATE 0x1
#define NVC397_SET_TEX_IN_DBG_SM_L1TAG_CTRL_CACHE_SURFACE_LD 0x2

View File

@@ -367,6 +367,7 @@ static const struct gpu_ops gv11b_ops = {
.init_preemption_state = gr_gv11b_init_preemption_state,
.update_boosted_ctx = gr_gp10b_update_boosted_ctx,
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_bes_crop_debug4 = gr_gp10b_set_bes_crop_debug4,
.create_gr_sysfs = gr_gv11b_create_sysfs,
.set_ctxsw_preemption_mode = gr_gv11b_set_ctxsw_preemption_mode,
.is_etpc_addr = gv11b_gr_pri_is_etpc_addr,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -3792,6 +3792,22 @@ static inline u32 gr_bes_crop_debug3_blendopt_fill_override_enabled_f(void)
{
return 0x4U;
}
static inline u32 gr_bes_crop_debug4_r(void)
{
return 0x0040894cU;
}
static inline u32 gr_bes_crop_debug4_clamp_fp_blend_m(void)
{
return 0x1U << 18U;
}
static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_inf_f(void)
{
return 0x0U;
}
static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_maxval_f(void)
{
return 0x40000U;
}
static inline u32 gr_bes_crop_settings_r(void)
{
return 0x00408958U;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -3996,6 +3996,22 @@ static inline u32 gr_bes_crop_debug3_blendopt_fill_override_enabled_f(void)
{
return 0x4U;
}
static inline u32 gr_bes_crop_debug4_r(void)
{
return 0x0040894cU;
}
static inline u32 gr_bes_crop_debug4_clamp_fp_blend_m(void)
{
return 0x1U << 18U;
}
static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_inf_f(void)
{
return 0x0U;
}
static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_maxval_f(void)
{
return 0x40000U;
}
static inline u32 gr_bes_crop_settings_r(void)
{
return 0x00408958U;

View File

@@ -3732,6 +3732,22 @@ static inline u32 gr_bes_crop_debug3_blendopt_fill_override_enabled_f(void)
{
return 0x4U;
}
static inline u32 gr_bes_crop_debug4_r(void)
{
return 0x0040894cU;
}
static inline u32 gr_bes_crop_debug4_clamp_fp_blend_m(void)
{
return 0x1U << 18U;
}
static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_inf_f(void)
{
return 0x0U;
}
static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_maxval_f(void)
{
return 0x40000U;
}
static inline u32 gr_bes_crop_settings_r(void)
{
return 0x00408958U;

View File

@@ -4416,6 +4416,22 @@ static inline u32 gr_bes_crop_debug3_blendopt_fill_override_enabled_f(void)
{
return 0x4U;
}
static inline u32 gr_bes_crop_debug4_r(void)
{
return 0x0040894cU;
}
static inline u32 gr_bes_crop_debug4_clamp_fp_blend_m(void)
{
return 0x1U << 18U;
}
static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_inf_f(void)
{
return 0x0U;
}
static inline u32 gr_bes_crop_debug4_clamp_fp_blend_to_maxval_f(void)
{
return 0x40000U;
}
static inline u32 gr_bes_crop_settings_r(void)
{
return 0x00408958U;