mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
gpu: nvgpu: bvec for struct nvgpu_tsg_sm_error_state fields
Add Setter and Getter methods for accessing tsg->sm_error_states. Getter returns a constant pointer for struct nvgpu_tsg_sm_error_state. This renders it unnecessary to add BVEC for above fields for the struct in multiple locations. The current design ensures that only a constant pointer is obtained from the owner unit i.e. FIFO. The following new methods are added. Both unit tests and BVEC tests are added for them as well. nvgpu_tsg_store_sm_error_state nvgpu_tsg_get_sm_error_state Jira NVGPU-6947 Change-Id: I82c22a2774862c8579baa41b6fb8292fa164704a Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> (cherry picked from commit 79574638671a0c6efe41cd3423668fcd1bd96826) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2556938 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com> Reviewed-by: Shashank Singh <shashsingh@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Vaibhav Kachore <vkachore@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
This commit is contained in:
committed by
mobile promotions
parent
6361653633
commit
791dc18666
@@ -1002,6 +1002,53 @@ int nvgpu_tsg_alloc_sm_error_states_mem(struct gk20a *g,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_tsg_store_sm_error_state(struct nvgpu_tsg *tsg, u32 sm_id,
|
||||
u32 hww_global_esr, u32 hww_warp_esr, u64 hww_warp_esr_pc,
|
||||
u32 hww_global_esr_report_mask, u32 hww_warp_esr_report_mask)
|
||||
{
|
||||
struct gk20a *g = tsg->g;
|
||||
u32 num_of_sm = g->ops.gr.init.get_no_of_sm(g);
|
||||
struct nvgpu_tsg_sm_error_state *sm_error_states = NULL;
|
||||
|
||||
if (sm_id >= num_of_sm) {
|
||||
nvgpu_err(g, "Invalid number of SMs");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (tsg->sm_error_states == NULL) {
|
||||
nvgpu_err(g, "invalid memory");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
sm_error_states = &tsg->sm_error_states[sm_id];
|
||||
|
||||
sm_error_states->hww_global_esr = hww_global_esr;
|
||||
sm_error_states->hww_warp_esr = hww_warp_esr;
|
||||
sm_error_states->hww_warp_esr_pc = hww_warp_esr_pc;
|
||||
sm_error_states->hww_global_esr_report_mask = hww_global_esr_report_mask;
|
||||
sm_error_states->hww_warp_esr_report_mask = hww_warp_esr_report_mask;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct nvgpu_tsg_sm_error_state *nvgpu_tsg_get_sm_error_state(struct nvgpu_tsg *tsg, u32 sm_id)
|
||||
{
|
||||
struct gk20a *g = tsg->g;
|
||||
u32 num_of_sm = g->ops.gr.init.get_no_of_sm(g);
|
||||
|
||||
if (sm_id >= num_of_sm) {
|
||||
nvgpu_err(g, "Invalid number of SMs");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (tsg->sm_error_states == NULL) {
|
||||
nvgpu_err(g, "Invalid memory");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return &tsg->sm_error_states[sm_id];
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
int nvgpu_tsg_set_sm_exception_type_mask(struct nvgpu_channel *ch,
|
||||
u32 exception_mask)
|
||||
|
||||
@@ -982,7 +982,6 @@ int vgpu_gr_resume_contexts(struct gk20a *g,
|
||||
void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
|
||||
struct tegra_vgpu_sm_esr_info *info)
|
||||
{
|
||||
struct nvgpu_tsg_sm_error_state *sm_error_states;
|
||||
struct nvgpu_tsg *tsg;
|
||||
u32 no_of_sm = g->ops.gr.init.get_no_of_sm(g);
|
||||
|
||||
@@ -1004,15 +1003,10 @@ void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
|
||||
|
||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||
|
||||
sm_error_states = &tsg->sm_error_states[info->sm_id];
|
||||
|
||||
sm_error_states->hww_global_esr = info->hww_global_esr;
|
||||
sm_error_states->hww_warp_esr = info->hww_warp_esr;
|
||||
sm_error_states->hww_warp_esr_pc = info->hww_warp_esr_pc;
|
||||
sm_error_states->hww_global_esr_report_mask =
|
||||
info->hww_global_esr_report_mask;
|
||||
sm_error_states->hww_warp_esr_report_mask =
|
||||
info->hww_warp_esr_report_mask;
|
||||
(void)nvgpu_tsg_store_sm_error_state(tsg, info->sm_id,
|
||||
info->hww_global_esr, info->hww_warp_esr,
|
||||
info->hww_warp_esr_pc, info->hww_global_esr_report_mask,
|
||||
info->hww_warp_esr_report_mask);
|
||||
|
||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -304,21 +304,24 @@ void gm20b_gr_intr_clear_sm_hww(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
||||
0);
|
||||
}
|
||||
|
||||
static void gm20b_gr_intr_read_sm_error_state(struct gk20a *g,
|
||||
u32 offset,
|
||||
struct nvgpu_tsg_sm_error_state *sm_error_states)
|
||||
static int gm20b_gr_intr_read_sm_error_state(struct gk20a *g,
|
||||
struct nvgpu_tsg *tsg, u32 offset, u32 sm_id)
|
||||
{
|
||||
sm_error_states->hww_global_esr = gk20a_readl(g, nvgpu_safe_add_u32(
|
||||
u32 hww_global_esr = gk20a_readl(g, nvgpu_safe_add_u32(
|
||||
gr_gpc0_tpc0_sm_hww_global_esr_r(), offset));
|
||||
sm_error_states->hww_warp_esr = gk20a_readl(g, nvgpu_safe_add_u32(
|
||||
u32 hww_warp_esr = gk20a_readl(g, nvgpu_safe_add_u32(
|
||||
gr_gpc0_tpc0_sm_hww_warp_esr_r(), offset));
|
||||
sm_error_states->hww_warp_esr_pc = (u64)(gk20a_readl(g, nvgpu_safe_add_u32(
|
||||
u64 hww_warp_esr_pc = (u64)(gk20a_readl(g, nvgpu_safe_add_u32(
|
||||
gr_gpc0_tpc0_sm_hww_warp_esr_pc_r(), offset)));
|
||||
sm_error_states->hww_global_esr_report_mask = gk20a_readl(g, nvgpu_safe_add_u32(
|
||||
u32 hww_global_esr_report_mask = gk20a_readl(g, nvgpu_safe_add_u32(
|
||||
gr_gpc0_tpc0_sm_hww_global_esr_report_mask_r(), offset));
|
||||
sm_error_states->hww_warp_esr_report_mask = gk20a_readl(g, nvgpu_safe_add_u32(
|
||||
u32 hww_warp_esr_report_mask = gk20a_readl(g, nvgpu_safe_add_u32(
|
||||
gr_gpc0_tpc0_sm_hww_warp_esr_report_mask_r(), offset));
|
||||
|
||||
return nvgpu_tsg_store_sm_error_state(tsg, sm_id,
|
||||
hww_global_esr, hww_warp_esr,
|
||||
hww_warp_esr_pc, hww_global_esr_report_mask,
|
||||
hww_warp_esr_report_mask);
|
||||
}
|
||||
|
||||
u32 gm20b_gr_intr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
||||
@@ -329,8 +332,8 @@ u32 gm20b_gr_intr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 s
|
||||
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_TPC_IN_GPC_STRIDE);
|
||||
u32 offset;
|
||||
struct nvgpu_tsg_sm_error_state *sm_error_states = NULL;
|
||||
struct nvgpu_tsg *tsg = NULL;
|
||||
int err = 0;
|
||||
|
||||
offset = nvgpu_safe_add_u32(
|
||||
nvgpu_safe_mult_u32(gpc_stride, gpc),
|
||||
@@ -353,8 +356,10 @@ u32 gm20b_gr_intr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 s
|
||||
goto record_fail;
|
||||
}
|
||||
|
||||
sm_error_states = tsg->sm_error_states + sm_id;
|
||||
gm20b_gr_intr_read_sm_error_state(g, offset, sm_error_states);
|
||||
err = gm20b_gr_intr_read_sm_error_state(g, tsg, offset, sm_id);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "error writing sm_error_state");
|
||||
}
|
||||
|
||||
record_fail:
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
|
||||
@@ -1852,34 +1852,36 @@ void gv11b_gr_intr_handle_ssync_hww(struct gk20a *g, u32 *ssync_esr)
|
||||
gr_ssync_hww_esr_reset_active_f());
|
||||
}
|
||||
|
||||
static void gv11b_gr_intr_read_sm_error_state(struct gk20a *g,
|
||||
u32 offset,
|
||||
struct nvgpu_tsg_sm_error_state *sm_error_states)
|
||||
static int gv11b_gr_intr_read_sm_error_state(struct gk20a *g,
|
||||
struct nvgpu_tsg *tsg, u32 offset, u32 sm_id)
|
||||
{
|
||||
u32 addr_hi, addr_lo;
|
||||
|
||||
sm_error_states->hww_global_esr = nvgpu_readl(g, nvgpu_safe_add_u32(
|
||||
u32 hww_global_esr = nvgpu_readl(g, nvgpu_safe_add_u32(
|
||||
gr_gpc0_tpc0_sm0_hww_global_esr_r(), offset));
|
||||
|
||||
sm_error_states->hww_warp_esr = nvgpu_readl(g, nvgpu_safe_add_u32(
|
||||
u32 hww_warp_esr = nvgpu_readl(g, nvgpu_safe_add_u32(
|
||||
gr_gpc0_tpc0_sm0_hww_warp_esr_r(), offset));
|
||||
|
||||
addr_hi = nvgpu_readl(g, nvgpu_safe_add_u32(
|
||||
u32 addr_hi = nvgpu_readl(g, nvgpu_safe_add_u32(
|
||||
gr_gpc0_tpc0_sm0_hww_warp_esr_pc_hi_r(), offset));
|
||||
addr_lo = nvgpu_readl(g, nvgpu_safe_add_u32(
|
||||
u32 addr_lo = nvgpu_readl(g, nvgpu_safe_add_u32(
|
||||
gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r(), offset));
|
||||
|
||||
sm_error_states->hww_warp_esr_pc = hi32_lo32_to_u64(addr_hi, addr_lo);
|
||||
u64 hww_warp_esr_pc = hi32_lo32_to_u64(addr_hi, addr_lo);
|
||||
|
||||
sm_error_states->hww_global_esr_report_mask = nvgpu_readl(g,
|
||||
u32 hww_global_esr_report_mask = nvgpu_readl(g,
|
||||
nvgpu_safe_add_u32(
|
||||
gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r(),
|
||||
offset));
|
||||
|
||||
sm_error_states->hww_warp_esr_report_mask = nvgpu_readl(g,
|
||||
u32 hww_warp_esr_report_mask = nvgpu_readl(g,
|
||||
nvgpu_safe_add_u32(
|
||||
gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r(),
|
||||
offset));
|
||||
|
||||
return nvgpu_tsg_store_sm_error_state(tsg, sm_id,
|
||||
hww_global_esr, hww_warp_esr,
|
||||
hww_warp_esr_pc, hww_global_esr_report_mask,
|
||||
hww_warp_esr_report_mask);
|
||||
}
|
||||
|
||||
u32 gv11b_gr_intr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
||||
@@ -1888,8 +1890,8 @@ u32 gv11b_gr_intr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 s
|
||||
u32 sm_id;
|
||||
u32 offset, sm_per_tpc, tpc_id;
|
||||
u32 gpc_offset, gpc_tpc_offset;
|
||||
struct nvgpu_tsg_sm_error_state *sm_error_states = NULL;
|
||||
struct nvgpu_tsg *tsg = NULL;
|
||||
int err = 0;
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||
@@ -1918,8 +1920,10 @@ u32 gv11b_gr_intr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 s
|
||||
goto record_fail;
|
||||
}
|
||||
|
||||
sm_error_states = &tsg->sm_error_states[sm_id];
|
||||
gv11b_gr_intr_read_sm_error_state(g, offset, sm_error_states);
|
||||
err = gv11b_gr_intr_read_sm_error_state(g, tsg, offset, sm_id);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "error writing sm_error_state");
|
||||
}
|
||||
|
||||
record_fail:
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
|
||||
@@ -618,6 +618,45 @@ void nvgpu_tsg_set_unserviceable(struct gk20a *g, struct nvgpu_tsg *tsg);
|
||||
*/
|
||||
void nvgpu_tsg_wakeup_wqs(struct gk20a *g, struct nvgpu_tsg *tsg);
|
||||
|
||||
/** @brief store error info for SM error state
|
||||
*
|
||||
* @param tsg [in] Pointer to the TSG struct.
|
||||
* @param sm_id [in] index of SM
|
||||
* @param hww_global_esr [in] hww_global_esr reg value
|
||||
* @param hww_warp_esr [in] hww_warp_esr register value
|
||||
* @param hww_warp_esr_pc [in] PC value of hww_warp_esr
|
||||
* @param hww_global_esr_report_mask [in] hww_global_esr_report_mask
|
||||
* @param hww_warp_esr_report_mask [in] hww_warp_esr_report_mask
|
||||
*
|
||||
* Allocate zero initialized memory to #nvgpu_tsg_sm_error_state, which stores
|
||||
* SM errors for all the SMs supported by h/w.
|
||||
*
|
||||
* @return 0 in case of success, < 0 in case of failure.
|
||||
* @retval -EINVAL if memory is already allocated to store
|
||||
* SM error states.
|
||||
* @retval -ENOMEM if memory could not be allocated to store
|
||||
* SM error states.
|
||||
*/
|
||||
int nvgpu_tsg_store_sm_error_state(struct nvgpu_tsg *tsg, u32 sm_id,
|
||||
u32 hww_global_esr, u32 hww_warp_esr, u64 hww_warp_esr_pc,
|
||||
u32 hww_global_esr_report_mask, u32 hww_warp_esr_report_mask);
|
||||
|
||||
/**
|
||||
* @brief retrieve pointer to nvgpu_tsg_get_sm_error_state
|
||||
*
|
||||
* @param tsg [in] Pointer to the TSG struct.
|
||||
* @param sm_id [in] index of SM
|
||||
*
|
||||
* Retrieve a pointer to a struct nvgpu_tsg_get_sm_error_state for
|
||||
* the index sm_id.
|
||||
*
|
||||
* @retval NULL if sm_id is incorrect or no memory was allocated for storing
|
||||
* SM error states.
|
||||
* @retval pointer to a constant struct nvgpu_tsg_sm_error_state
|
||||
*/
|
||||
const struct nvgpu_tsg_sm_error_state *nvgpu_tsg_get_sm_error_state(
|
||||
struct nvgpu_tsg *tsg, u32 sm_id);
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
int nvgpu_tsg_set_sm_exception_type_mask(struct nvgpu_channel *ch,
|
||||
u32 exception_mask);
|
||||
|
||||
@@ -276,7 +276,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
|
||||
struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args)
|
||||
{
|
||||
struct gk20a *g = dbg_s->g;
|
||||
struct nvgpu_tsg_sm_error_state *sm_error_state;
|
||||
const struct nvgpu_tsg_sm_error_state *sm_error_state = NULL;
|
||||
struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record;
|
||||
struct nvgpu_channel *ch;
|
||||
struct nvgpu_tsg *tsg;
|
||||
@@ -307,7 +307,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
|
||||
|
||||
nvgpu_speculation_barrier();
|
||||
|
||||
sm_error_state = tsg->sm_error_states + sm_id;
|
||||
sm_error_state = nvgpu_tsg_get_sm_error_state(tsg, sm_id);
|
||||
sm_error_state_record.hww_global_esr =
|
||||
sm_error_state->hww_global_esr;
|
||||
sm_error_state_record.hww_warp_esr =
|
||||
|
||||
@@ -583,7 +583,7 @@ static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g,
|
||||
struct nvgpu_tsg *tsg,
|
||||
struct nvgpu_tsg_read_single_sm_error_state_args *args)
|
||||
{
|
||||
struct nvgpu_tsg_sm_error_state *sm_error_state;
|
||||
const struct nvgpu_tsg_sm_error_state *sm_error_state = NULL;
|
||||
struct nvgpu_tsg_sm_error_state_record sm_error_state_record;
|
||||
u32 sm_id;
|
||||
int err = 0;
|
||||
@@ -597,7 +597,7 @@ static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g,
|
||||
|
||||
nvgpu_speculation_barrier();
|
||||
|
||||
sm_error_state = tsg->sm_error_states + sm_id;
|
||||
sm_error_state = nvgpu_tsg_get_sm_error_state(tsg, sm_id);
|
||||
sm_error_state_record.global_esr =
|
||||
sm_error_state->hww_global_esr;
|
||||
sm_error_state_record.warp_esr =
|
||||
|
||||
@@ -700,6 +700,8 @@ nvgpu_timeout_expired_fault_injection
|
||||
nvgpu_timeout_init
|
||||
nvgpu_timeout_peek_expired
|
||||
nvgpu_timers_get_fault_injection
|
||||
nvgpu_tsg_store_sm_error_state
|
||||
nvgpu_tsg_get_sm_error_state
|
||||
nvgpu_tsg_abort
|
||||
nvgpu_tsg_bind_channel
|
||||
nvgpu_tsg_check_and_get_from_id
|
||||
|
||||
@@ -851,6 +851,137 @@ done:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int test_tsg_sm_error_state_set_get(struct unit_module *m,
|
||||
struct gk20a *g, void *args)
|
||||
{
|
||||
struct gpu_ops gops = g->ops;
|
||||
struct nvgpu_channel *ch = NULL;
|
||||
struct nvgpu_tsg *tsg = NULL;
|
||||
int ret = UNIT_FAIL;
|
||||
int err = 0;
|
||||
int num_sm = g->ops.gr.init.get_no_of_sm(g);
|
||||
u32 valid_sm_id[][2] = {{0, num_sm - 1}};
|
||||
u32 invalid_sm_id[][2] = {{num_sm, U32_MAX}};
|
||||
u32 i = 0, j = 0, sm_id_range, states, sm_id, t = 0, z = 0;
|
||||
u32 (*working_list)[2];
|
||||
const char *string_states[] = {"Min", "Max", "Mid"};
|
||||
struct nvgpu_tsg_sm_error_state *sm_error_states = NULL;
|
||||
const struct nvgpu_tsg_sm_error_state *get_error_state = NULL;
|
||||
u32 sm_error_states_values[] = {0, 0, 0, 0};
|
||||
u64 hww_warp_esr_pc = 0;
|
||||
|
||||
tsg = nvgpu_tsg_open(g, getpid());
|
||||
unit_assert(tsg != NULL, goto done);
|
||||
|
||||
ch = nvgpu_channel_open_new(g, ~0U, false, getpid(), getpid());
|
||||
unit_assert(ch != NULL, goto done);
|
||||
|
||||
err = nvgpu_tsg_bind_channel(tsg, ch);
|
||||
unit_assert(err == 0, goto done);
|
||||
|
||||
sm_error_states = tsg->sm_error_states;
|
||||
|
||||
//check for SM_ERROR_STATE null
|
||||
tsg->sm_error_states = NULL;
|
||||
err = nvgpu_tsg_store_sm_error_state(tsg, 0, 0, 0, 0, 0, 0);
|
||||
unit_assert(err != 0, goto done);
|
||||
|
||||
tsg->sm_error_states = sm_error_states;
|
||||
err = nvgpu_tsg_store_sm_error_state(tsg, 0, 0, 0, 0, 0, 0);
|
||||
unit_assert(err == 0, goto done);
|
||||
|
||||
//check for SM_ERROR_STATE null
|
||||
tsg->sm_error_states = NULL;
|
||||
get_error_state = nvgpu_tsg_get_sm_error_state(tsg, 0);
|
||||
unit_assert(get_error_state == NULL, goto done);
|
||||
tsg->sm_error_states = sm_error_states;
|
||||
|
||||
/* valid, invalid sm_ids */
|
||||
for (i = 0; i < 2; i++) {
|
||||
working_list = (i == 0) ? valid_sm_id : invalid_sm_id;
|
||||
sm_id_range = (i == 0) ? ARRAY_SIZE(valid_sm_id) : ARRAY_SIZE(invalid_sm_id);
|
||||
for (j = 0; j < sm_id_range; j++) {
|
||||
for (states = 0; states < 3; states++) {
|
||||
if (states == 0) {
|
||||
sm_id = working_list[j][0];
|
||||
} else if (states == 1) {
|
||||
sm_id = working_list[j][1];
|
||||
} else {
|
||||
if (working_list[j][1] - working_list[j][0] > 1) {
|
||||
sm_id = get_random_u32(working_list[j][0] + 1, working_list[j][1] - 1);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Invalid SM_ID case */
|
||||
if (i == 1) {
|
||||
unit_info(m, "BVEC testing for nvgpu_tsg_store_sm_error_state with sm_id = 0x%08x(Invalid range %s) \n", sm_id, string_states[states]);
|
||||
err = nvgpu_tsg_store_sm_error_state(tsg, sm_id, 0, 0, 0, 0, 0);
|
||||
unit_assert(err != 0, goto done);
|
||||
|
||||
unit_info(m, "BVEC testing for nvgpu_tsg_get_sm_error_state with sm_id = 0x%08x(Invalid range %s) \n", sm_id, string_states[states]);
|
||||
get_error_state = nvgpu_tsg_get_sm_error_state(tsg, sm_id);
|
||||
unit_assert(get_error_state == NULL, goto done);
|
||||
} else {
|
||||
for (t = 0; t < 3; t++) {
|
||||
/* Loop to fill the SM error values */
|
||||
for (z = 0; z < 4; z++) {
|
||||
if (t == 0) {
|
||||
/* Default 0*/
|
||||
} else if (t == 1) {
|
||||
sm_error_states_values[z] = U32_MAX;
|
||||
hww_warp_esr_pc = U32_MAX;
|
||||
} else {
|
||||
sm_error_states_values[z] = get_random_u32(1, U32_MAX - 1);
|
||||
hww_warp_esr_pc = 2ULL * U32_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
unit_info(m, "BVEC testing for nvgpu_tsg_store_sm_error_state with sm_id = 0x%08x(Valid range %s)\n", sm_id, string_states[t]);
|
||||
unit_info(m, "hww_global_esr = 0x%08x\n", sm_error_states_values[0]);
|
||||
unit_info(m, "hww_warp_esr = 0x%08x\n", sm_error_states_values[1]);
|
||||
unit_info(m, "hww_warp_esr_pc = 0x%016llx\n", hww_warp_esr_pc);
|
||||
unit_info(m, "hww_global_esr_report_mask = 0x%08x\n", sm_error_states_values[2]);
|
||||
unit_info(m, "hww_warp_esr_report_mask = 0x%08x\n", sm_error_states_values[3]);
|
||||
|
||||
err = nvgpu_tsg_store_sm_error_state(tsg, sm_id,
|
||||
sm_error_states_values[0], sm_error_states_values[1], hww_warp_esr_pc,
|
||||
sm_error_states_values[2], sm_error_states_values[3]);
|
||||
unit_assert(err == 0, goto done);
|
||||
|
||||
unit_info(m, "BVEC testing for nvgpu_tsg_get_sm_error_state with sm_id = %u(Valid range %s) \n", sm_id, string_states[t]);
|
||||
get_error_state = nvgpu_tsg_get_sm_error_state(tsg, sm_id);
|
||||
unit_assert(get_error_state != NULL, goto done);
|
||||
|
||||
unit_assert(get_error_state->hww_global_esr == sm_error_states_values[0], goto done);
|
||||
unit_assert(get_error_state->hww_warp_esr == sm_error_states_values[1], goto done);
|
||||
unit_assert(get_error_state->hww_warp_esr_pc == hww_warp_esr_pc, goto done);
|
||||
unit_assert(get_error_state->hww_global_esr_report_mask == sm_error_states_values[2], goto done);
|
||||
unit_assert(get_error_state->hww_warp_esr_report_mask == sm_error_states_values[3], goto done);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret = UNIT_SUCCESS;
|
||||
done:
|
||||
if (ret == UNIT_FAIL) {
|
||||
unit_err(m, "branches=%s\n", __func__);
|
||||
}
|
||||
|
||||
if (ch != NULL) {
|
||||
nvgpu_tsg_force_unbind_channel(tsg, ch);
|
||||
nvgpu_channel_close(ch);
|
||||
}
|
||||
if (tsg != NULL) {
|
||||
nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release);
|
||||
}
|
||||
g->ops = gops;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define F_UNBIND_CHANNEL_CHECK_CTX_RELOAD_SET BIT(0)
|
||||
#define F_UNBIND_CHANNEL_CHECK_CTX_RELOAD_CHID_MATCH BIT(1)
|
||||
#define F_UNBIND_CHANNEL_CHECK_CTX_RELOAD_LAST BIT(2)
|
||||
@@ -1650,6 +1781,7 @@ struct unit_module_test nvgpu_tsg_tests[] = {
|
||||
UNIT_TEST(unbind_channel, test_tsg_unbind_channel, &unit_ctx, 0),
|
||||
UNIT_TEST(unbind_channel_check_hw_state,
|
||||
test_tsg_unbind_channel_check_hw_state, &unit_ctx, 0),
|
||||
UNIT_TEST(sm_error_states, test_tsg_sm_error_state_set_get, &unit_ctx, 0),
|
||||
UNIT_TEST(unbind_channel_check_ctx_reload,
|
||||
test_tsg_unbind_channel_check_ctx_reload, &unit_ctx, 0),
|
||||
UNIT_TEST(enable_disable, test_tsg_enable, &unit_ctx, 0),
|
||||
|
||||
@@ -220,6 +220,40 @@ int test_tsg_release(struct unit_module *m,
|
||||
int test_tsg_unbind_channel_check_hw_state(struct unit_module *m,
|
||||
struct gk20a *g, void *args);
|
||||
|
||||
/**
|
||||
* Test specification for: struct nvgpu_tsg_sm_error_state
|
||||
*
|
||||
* Description: Check HW state during TSG unbind channel.
|
||||
*
|
||||
* Test Type: Feature, Boundary Value
|
||||
*
|
||||
* Targets: nvgpu_tsg_store_sm_error_state, nvgpu_tsg_get_sm_error_state
|
||||
*
|
||||
* Input: test_fifo_init_support() run for this GPU
|
||||
* Equivalence classes:
|
||||
* sm_id
|
||||
* - Invalid : [g->ops.gr.init.get_no_of_sm(g), U32_MAX]
|
||||
* - Valid : [0, g->ops.gr.init.get_no_of_sm(g) - 1]
|
||||
* struct nvgpu_tsg_sm_error_state fields
|
||||
* - Valid : [0, U32_MAX]
|
||||
*
|
||||
* Steps:
|
||||
* 1) tsg->sm_error_states = NULL (Invalid Case)
|
||||
* Verify nvgpu_tsg_store_sm_error_state returns error
|
||||
* Verify nvgpu_tsg_get_sm_error_state returns NULL
|
||||
* 2) sm_id >= g->ops.gr.init.get_no_of_sm(g) (Invalid Case)
|
||||
* Verify nvgpu_tsg_store_sm_error_state returns error
|
||||
* Verify nvgpu_tsg_get_sm_error_state returns NULL
|
||||
* 3) For Valid sm_id and tsg->sm_error_states != NULL
|
||||
* For each value within struct nvgpu_tsg_sm_error_state,
|
||||
* test with Min, Max and one random number between [0, U32_MAX].
|
||||
* a) Verify nvgpu_tsg_store_sm_error_state returns 0
|
||||
* b) Verify nvgpu_tsg_get_sm_error_state returns non NULL.
|
||||
*
|
||||
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
|
||||
*/
|
||||
int test_tsg_sm_error_state_set_get(struct unit_module *m,
|
||||
struct gk20a *g, void *args);
|
||||
/**
|
||||
* Test specification for: test_tsg_unbind_channel_check_ctx_reload
|
||||
*
|
||||
|
||||
@@ -199,12 +199,13 @@ static int gr_test_intr_allocate_ch(struct unit_module *m,
|
||||
|
||||
|
||||
static int gr_test_intr_block_ptr_as_current_ctx(struct unit_module *m,
|
||||
struct gk20a *g, struct nvgpu_channel *ch,
|
||||
struct gk20a *g, struct nvgpu_channel *ch, struct nvgpu_tsg *tsg,
|
||||
u32 pid)
|
||||
{
|
||||
int err, i;
|
||||
struct nvgpu_gr_intr *intr = g->gr->intr;
|
||||
u32 tsgid = nvgpu_inst_block_ptr(g, &ch->inst_block);
|
||||
struct nvgpu_tsg_sm_error_state *sm_error_states = NULL;
|
||||
|
||||
err = EXPECT_BUG(g->ops.gr.intr.stall_isr(g));
|
||||
if (err != 0) {
|
||||
@@ -219,6 +220,17 @@ static int gr_test_intr_block_ptr_as_current_ctx(struct unit_module *m,
|
||||
unit_return_fail(m, "failed stall isr\n");
|
||||
}
|
||||
|
||||
/* Cover the case where gv11b_gr_intr_read_sm_error_state fails */
|
||||
sm_error_states = tsg->sm_error_states;
|
||||
tsg->sm_error_states = NULL;
|
||||
|
||||
err = g->ops.gr.intr.stall_isr(g);
|
||||
if (err != 0) {
|
||||
unit_return_fail(m, "failed stall isr\n");
|
||||
}
|
||||
|
||||
tsg->sm_error_states = sm_error_states;
|
||||
|
||||
/* Make all entry valid so code with flush one */
|
||||
for (i = 0; i < GR_TEST_CHANNEL_MAP_TLB_SIZE; i++) {
|
||||
intr->chid_tlb[i].curr_ctx = pid;
|
||||
@@ -289,7 +301,7 @@ static int gr_test_intr_allocate_ch_tsg(struct unit_module *m,
|
||||
goto ch_cleanup;
|
||||
}
|
||||
|
||||
err = gr_test_intr_block_ptr_as_current_ctx(m, g, ch, tsgid);
|
||||
err = gr_test_intr_block_ptr_as_current_ctx(m, g, ch, tsg, tsgid);
|
||||
if (err != 0) {
|
||||
unit_err(m, "isr failed with block_ptr as current_ctx\n");
|
||||
goto tsg_unbind;
|
||||
|
||||
Reference in New Issue
Block a user