Files
linux-nvgpu/userspace/units/fifo/tsg/nvgpu-tsg.h
Sagar Kamble 80efe558b1 gpu: nvgpu: add BVEC test for nvgpu_rc_pbdma_fault
Update nvgpu_rc_pbdma_fault with invalid checks and add BVEC test
for it.

Make ga10b_fifo_pbdma_isr static.

NVGPU-6772

Change-Id: I5485760c53e1fff1278557a5b25659a1fc0e4eaf
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2551617
(cherry picked from commit e917042d395d07cb902580bad3d5a7d0096cc303)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2623625
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2022-07-14 08:58:31 -07:00

517 lines
18 KiB
C

/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef UNIT_NVGPU_TSG_H
#define UNIT_NVGPU_TSG_H
#include <nvgpu/types.h>
struct unit_module;
struct gk20a;
/** @addtogroup SWUTS-fifo-tsg
* @{
*
* Software Unit Test Specification for fifo/tsg
*/
/**
* Test specification for: test_tsg_open
*
* Description: Create TSG
*
* Test Type: Feature
*
* Targets: nvgpu_tsg_open, nvgpu_tsg_open_common,
* nvgpu_tsg_alloc_sm_error_states_mem,
* nvgpu_tsg_default_timeslice_us,
* nvgpu_tsg_get_from_id,
* nvgpu_tsg_check_and_get_from_id
*
* Input: test_fifo_init_support() run for this GPU
*
* Steps:
* - Check that nvgpu_tsg_default_timeslice_us returns
* NVGPU_TSG_TIMESLICE_DEFAULT_US.
* - Check that nvgpu_tsg_check_and_get_from_id return NULL for
* NVGPU_INVALID_TSG_ID.
* - Check that TSG can be allocated with nvgpu_tsg_open.
* - Check that nvgpu_tsg_open returns a non NULL value.
* - Check that tsg can be retrieved from tsgid with nvgpu_tsg_get_from_id.
* - Check that nvgpu_tsg_check_and_get_from_id return tsg from its id.
* - Decrement ref_count in order to invoke nvgpu_tsg_release.
* - Check TSG allocation failures cases:
* - failure to acquire unused TSG (by forcing f->num_channels to 0).
* - failure to allocate sm error state:
* - invalid number of SMs (by stubbing g->ops.gr.init.get_no_of_sm).
* - TSG context in use (by setting next tsg->sm_error_states to
* non NULL value).
* - failure to allocate memory (by enabling fault injection for
* kzalloc).
* In negative testing case, original state is restored after checking
* that nvgpu_tsg_open failed.
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_open(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_tsg_bind_channel
*
* Description: Bind channel to TSG.
*
* Test Type: Feature
*
* Targets: nvgpu_tsg_bind_channel, nvgpu_tsg_from_ch
*
* Input: test_fifo_init_support() run for this GPU
*
* Steps:
* - Check that channel can be bound to TSG:
* - Allocate TSG with nvgpu_tsg_open.
* - Allocate channel with nvgpu_channel_open_new.
* - Check that nvgpu_tsg_bind_channel returns 0.
* - Check that TSG's list of channel is not empty.
* - Unbind channel with nvgpu_tsg_unbind_channel.
* - Check that ch->tsgid is now invalid.
* - Check that tsg can be retrieved from ch using nvgpu_tsg_from_ch.
* - Check TSG bind failure cases:
* - Attempt to bind an already bound channel (by binding a channel to a
* TSG, then attempting to bind it to another TSG).
* - Attempt to bind channel and TSGs with runlist_id mismatch (by forcing
* TSG's runlist_id to a different value).
* - Attempt to bind a channel that is already active (by forcing related
* bit in the runlist->active_channels bitmask).
* - Check that nvgpu_tsg_from_ch return NULL when bind failed.
* In negative testing case, original state is restored after checking
* that test_tsg_bind_channel failed.
* - Additionally, the following cases are checked:
* - Case where g->ops.tsg.bind_channel is NULL.
* - Case where g->ops.tsg.bind_channel_eng_method_buffers is NULL.
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_bind_channel(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_tsg_unbind_channel
*
* Description: Unbind channel from TSG.
*
* Test Type: Feature
*
* Targets: nvgpu_tsg_unbind_channel
*
* Input: test_fifo_init_support() run for this GPU
*
* Steps:
* - Check that channel can be unbound from TSG:
* - Allocate TSG and channel.
* - Bind channel to TSG.
* - Unbind channel from TSG.
* - Check that channel has been removed from TSG's list.
* - Check that channel's tsgid is invalid.
* - Check that other channels in TSG are still bound.
* - Check TSG unbind failure cases:
* - Attempt to unbind an unserviceable channel (by forcing unserviceable).
* - Failure to preempt TSG (by stubbing g->ops.fifo.preempt_tsg).
* - Channel with invalid HW state (by stubbing
* g->ops.tsg.unbind_channel_check_hw_state).
* - Failure to update runlist (by stubbing
* g->ops.runlist.update_for_channel).
* - Failure to update runlist during TSG abort (by stubbing
* g->ops.runlist.update_for_channel and using a counter to fail only
* during abort).
* - Attempt to bind an already bound channel (by binding a channel to a
* TSG, then attempting to bind it to another TSG).
* - Attempt to bind channel and TSGs with runlist_id mismatch (by forcing
* TSG's runlist_id to a different value).
* - Attempt to bind a channel that is already active (by forcing related
* bit in the runlist->active_channels bitmask).
* In negative testing case, original state is restored after checking
* that test_tsg_unbind_channel failed.
* - Additionally, the following cases are checked:
* - Case where g->ops.tsg.unbind_channel is NULL.
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_unbind_channel(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_tsg_release
*
* Description: Release TSG.
*
* Test Type: Feature
*
* Targets: nvgpu_tsg_release, nvgpu_tsg_release_common
*
* Input: test_fifo_init_support() run for this GPU
*
* Steps:
* - Check that TSG can be released:
* - Allocate TSG.
* - Decrement ref count and check that TSG is released.
* - Check that in_use is false.
* - Check de-allocation of other resources:
* - Case where g->ops.gr.setup.free_gr_ctx is called.
* It requires dummy vm, gr_ctx and gr_ctx->mem to be allocated.
* A stub is used to check that the HAL was actually invoked.
* - Other combinations of vm, gr_ctx and gr_ctx->mem allocations, to
* check that g->ops.gr.setup.free_gr_ctx is not called.
* - Unhook of event_ids (by adding 2 dummy events in event_id list, and
* checking that list is empty after TSG release).
* - Case where event_id is empty before TSG release is tested as well
* - Check that VM refcount is decremented (and VM deallocated in our
* case), when present.
* - Check that sm_error_states is deallocated.
* - Check any combination of VM, gr_ctx, gr_ctx->mem, and sm_error_state.
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_release(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_tsg_unbind_channel_check_hw_state
*
* Description: Check HW state during TSG unbind channel.
*
* Test Type: Feature
*
* Targets: nvgpu_tsg_unbind_channel_hw_state_check, gk20a_tsg_unbind_channel_check_hw_next
*
* Input: test_fifo_init_support() run for this GPU
*
* Steps:
* - Check valid cases for nvgpu_tsg_unbind_channel_hw_state_check:
* - hw_state.next is not set (as per g->ops.channel.read_state).
* - Check that g->ops.tsg.unbind_channel_check_ctx_reload is called
* when defined (using a stub).
* - Check that g->ops.tsg.unbind_channel_check_eng_faulted is called
* when defined (using a stub).
* - Check failure cases in nvgpu_tsg_unbind_channel_hw_state_check:
* - Case where hw_state.next is set (by stubbing g->ops.channel.read_state).
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_unbind_channel_check_hw_state(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: struct nvgpu_tsg_sm_error_state
*
* Description: Check HW state during TSG unbind channel.
*
* Test Type: Feature, Boundary Value
*
* Targets: nvgpu_tsg_store_sm_error_state, nvgpu_tsg_get_sm_error_state
*
* Input: test_fifo_init_support() run for this GPU
* Equivalence classes:
* sm_id
* - Invalid : [g->ops.gr.init.get_no_of_sm(g), U32_MAX]
* - Valid : [0, g->ops.gr.init.get_no_of_sm(g) - 1]
* struct nvgpu_tsg_sm_error_state fields
* - Valid : [0, U32_MAX]
*
* Steps:
* 1) tsg->sm_error_states = NULL (Invalid Case)
* Verify nvgpu_tsg_store_sm_error_state returns error
* Verify nvgpu_tsg_get_sm_error_state returns NULL
* 2) sm_id >= g->ops.gr.init.get_no_of_sm(g) (Invalid Case)
* Verify nvgpu_tsg_store_sm_error_state returns error
* Verify nvgpu_tsg_get_sm_error_state returns NULL
* 3) For Valid sm_id and tsg->sm_error_states != NULL
* For each value within struct nvgpu_tsg_sm_error_state,
* test with Min, Max and one random number between [0, U32_MAX].
* a) Verify nvgpu_tsg_store_sm_error_state returns 0
* b) Verify nvgpu_tsg_get_sm_error_state returns non NULL.
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_sm_error_state_set_get(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_tsg_unbind_channel_check_ctx_reload
*
* Description: Check if channel reload is needed during TSG unbind
*
* Test Type: Feature
*
* Targets: nvgpu_tsg_unbind_channel_ctx_reload_check
*
* Input: test_fifo_init_support() run for this GPU
*
* Steps:
* - Check valid cases for nvgpu_tsg_unbind_channel_ctx_reload_check:
* - hw_state.ctx_reload is not set (nothing to do).
* - hw_state.ctx_reload is set:
* - Check that what another is bound to TSG, g->ops.channel.force_ctx_reload
* is called for this channel. This is done by allocating another channel,
* binding it to the same TSG, stubbing g->ops.channel.force_ctx_reload,
* and checking that the stub was called for this channel.
* - Check that g->ops.channel.force_ctx_reload is not called when there is
* no other channel in the TSG.
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_unbind_channel_check_ctx_reload(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_tsg_enable
*
* Description: Enable/disable TSG
*
* Test Type: Feature
*
* Targets: gops_tsg.enable, gops_tsg.disable,
* nvgpu_tsg_disable
*
* Input: test_fifo_init_support() run for this GPU
*
* Steps:
* - Check valid cases for g->ops.tsg.enable:
* - Enable TSG with a bound channel.
* - Check that g->ops.channel.enable is called (using stub).
* - Check that g->ops.usermode.ring_doorbell (using stub).
* - Enable TSG without bound channel.
* - Check valid cases for g->ops.tsg.disable:
* - Disable TSG with a bound channel.
* - Check that g->ops.channel.disable is called (using stub).
* - Disable TSG without bound channel.
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_enable(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_tsg_check_and_get_from_id
*
* Description: Get TSG context from id
*
* Test Type: Feature
*
* Targets: tsg_check_and_get_from_id
*
* Input: test_fifo_init_support() run for this GPU
*
* Steps:
* - Check that nvgpu_tsg_check_and_get_from_id returns NULL for
* and invalid tsgid (NVGPU_INVALID_TSG_ID).
* - Check that nvgpu_tsg_check_and_get_from_id returns correct
* tsg pointer for an existing TSG.
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_check_and_get_from_id(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_tsg_check_and_get_from_id
*
* Description: This test will validate boundary values for
* the function nvgpu_tsg_check_and_get_from_id
*
* Test Type: Boundary Values
*
* Targets: nvgpu_tsg_check_and_get_from_id
*
* Input: test_fifo_init_support() run for this GPU
* Equivalence classes:
* tsgid
* - Invalid : {(&g->fifo->num_channels - 1) - U32_MAX }
* - Valid : { 0 - (&g->fifo->num_channels - 1) }
*
* Steps:
* - Check that nvgpu_tsg_check_and_get_from_id returns NULL for
* any invalid tsgid.
* - Check that nvgpu_tsg_check_and_get_from_id returns correct
* tsg pointer for any valid tsgid.
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_check_and_get_from_id_bvec(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_tsg_abort
*
* Description: Abort TSG
*
* Test Type: Feature
*
* Targets: nvgpu_tsg_abort
*
* Input: test_fifo_init_support() run for this GPU
*
* Steps:
* - Check valid cases for nvgpu_tsg_abort:
* - Abort TSG with bound channel.
* - Check that g->ops.channel.abort_clean_up is called for channel
* (by using stub).
* - Abort TSG without bound channel.
* - Check with and without preempt set.
* - Check that g->ops.fifo.preempt_tsg is called when preempt is
* requested (by using stub).
* - Check invalid cases for nvgpu_tsg_abort:
* - Attempt to abort a non-abortable TSG (by forcing tsg->abortable=false).
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_abort(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_tsg_setup_sw
*
* Description: SW Initialization for TSGs
*
* Test Type: Feature
*
* Targets: nvgpu_tsg_setup_sw, nvgpu_tsg_cleanup_sw
*
* Input: None
*
* Steps:
* - Check valid case for nvgpu_tsg_setup_sw.
* - Check valid case for nvgpu_tsg_cleanup_sw.
* - Check invalid case for nvgpu_tsg_setup_sw.
* - Failure to allocate TSG context (by using fault injection for vzalloc).
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_setup_sw(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_tsg_mark_error
*
* Description: Mark all channels unserviceable in a TSG
*
* Test Type: Feature
*
* Targets: nvgpu_tsg_mark_error, nvgpu_tsg_set_error_notifier
*
* Input: None
*
* Steps:
* - Check marginal cases:
* - Mark error for TSG with no bound channel.
* - Mark error for TSG with one non serviceable channel.
* - Check likely cases:
* - Use one TSG with one bound channel.
* - Set error notifier to NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT.
* - Check that nvgpu_tsg_mark_error returns true (i.e. verbose), when
* ch->ctxsw_timeout_debug_dump is true.
* - Check that nvgpu_tsg_mark_error returns false otherwise.
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_mark_error(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: nvgpu_tsg_set_error_notifier
*
* Description: This test will verify the boundary values for the function
* nvgpu_tsg_set_error_notifier
*
* Test Type: Boundary Value
*
* Targets: nvgpu_tsg_set_error_notifier
*
* Input: None
* Equivalence classes:
* error_notifier
* - Invalid : { NVGPU_ERR_NOTIFIER_INVAL, U32_MAX }
* - Valid : { NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH }
*
* Steps:
* Check likely cases:
* - Use one TSG with one bound channel for minimum, median and maximum values
* from valid classes.
* - Use one TSG with one bound channel for minimum, maximum and one other random value
* from invalid classes.
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_nvgpu_tsg_set_error_notifier_bvec(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_tsg_set_ctx_mmu_error
*
* Description: Set MMU fault error notifier for TSG
*
* Test Type: Feature
*
* Targets: nvgpu_tsg_set_ctx_mmu_error
*
* Input: None
*
* Steps:
* - Setup a TSG with one bound channel.
* - Initialize error notifier for channel.
* - Call nvgpu_tsg_set_ctx_mmu_erro for TSG.
* - Check that channel's error notifier has been set to
* NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT.
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_set_ctx_mmu_error(struct unit_module *m,
struct gk20a *g, void *args);
/**
* Test specification for: test_tsg_reset_faulted_eng_pbdma
*
* Description: Reset faulted engine and/or PBDMAs for a TSG
*
* Test Type: Feature
*
* Targets: nvgpu_tsg_reset_faulted_eng_pbdma, nvgpu_channel_from_ch_entry
*
* Input: None
*
* Steps:
* - Check valid case:
* - Setup a TSG with one bound channel.
* - Call nvgpu_tsg_reset_faulted_eng_pbdma.
* - Check that g->ops.channel.reset_faulted was called for channel.
* - Check invalid cases:
* - Case where TSG pointer is NULL.
* - Case where g->ops.channel.reset_faulted is NULL.
*
* Output: Returns PASS if all branches gave expected results. FAIL otherwise.
*/
int test_tsg_reset_faulted_eng_pbdma(struct unit_module *m,
struct gk20a *g, void *args);
/**
* @}
*/
#endif /* UNIT_NVGPU_TSG_H */