Files
linux-nvgpu/userspace/units/rc/nvgpu-rc.c
Alex Waterman 59eb714c48 unit: Disable some unit tests for device work
Fix what unit tests can be easily fixed, but disable some others. It's
not clear why the MM related tests started failing - there's really zero
reason for this. The list of disable tests are primarily engine related
but there are some others that get inflenced by the device and engine
structure.

  test_poweroff.init_poweroff=2
  test_is_stall_and_eng_intr_pending.intr_is_stall_and_eng_intr_pending=2
  test_isr_nonstall.isr_nonstall=2
  test_isr_stall.isr_stall=2
  test_engine_enum_from_type.enum_from_type=2
  test_engine_find_busy_doing_ctxsw.find_busy_doing_ctxsw=2
  test_engine_get_active_eng_info.get_active_eng_info=2
  test_engine_get_fast_ce_runlist_id.get_fast_ce_runlist_id=2
  test_engine_get_gr_runlist_id.get_gr_runlist_id=2
  test_engine_get_mask_on_id.get_mask_on_id=2
  test_engine_get_runlist_busy_engines.get_runlist_busy_engines=2
  test_engine_ids.ids=2
  test_engine_init_info.init_info=2
  test_engine_interrupt_mask.interrupt_mask=2
  test_engine_is_valid_runlist_id.is_valid_runlist_id=2
  test_engine_mmu_fault_id.mmu_fault_id=2
  test_engine_mmu_fault_id_veid.mmu_fault_id_veid=2
  test_engine_setup_sw.setup_sw=2
  test_engine_status.status=2
  test_fifo_init_support.init_support=2
  test_fifo_remove_support.remove_support=2
  test_gp10b_engine_init_ce_info.engine_init_ce_info=2
  test_nvgpu_mem_iommu_translate.mem_iommu_translate=2
  test_nvgpu_mem_phys_ops.nvgpu_mem_phys_ops=2

And delete unit tests for functions that no longer exist:

  test_device_info_parse_enum.top_device_info_parse_enum
  test_get_device_info.top_get_device_info
  test_get_num_engine_type_entries.top_get_num_engine_type_entries
  test_is_engine_ce.top_is_engine_ce
  test_is_engine_gr.top_is_engine_gr

JIRA NVGPU-5421

Change-Id: I343c0b1ea44c472b22356c896672153fc889ffc0
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2355300
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2020-12-15 14:13:28 -06:00

450 lines
13 KiB
C

/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <stdlib.h>
#include <unistd.h>
#include <unit/unit.h>
#include <unit/io.h>
#include <nvgpu/types.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/hal_init.h>
#include <nvgpu/dma.h>
#include <nvgpu/posix/io.h>
#include <os/posix/os_posix.h>
#include <nvgpu/posix/posix-fault-injection.h>
#include <nvgpu/posix/posix-nvhost.h>
#include <nvgpu/posix/posix-channel.h>
#include <nvgpu/runlist.h>
#include <nvgpu/device.h>
#include <nvgpu/channel.h>
#include <nvgpu/rc.h>
#include <nvgpu/pbdma_status.h>
#include <nvgpu/error_notifier.h>
#include "../fifo/nvgpu-fifo-common.h"
#include "../fifo/nvgpu-fifo-gv11b.h"
#include "nvgpu-rc.h"
#define NV_PMC_BOOT_0_ARCHITECTURE_GV110 (0x00000015 << \
NVGPU_GPU_ARCHITECTURE_SHIFT)
#define NV_PMC_BOOT_0_IMPLEMENTATION_B 0xB
#define assert(cond) unit_assert(cond, goto done)
static u32 stub_gv11b_gr_init_get_no_of_sm(struct gk20a *g)
{
return 8;
}
static struct nvgpu_channel *ch = NULL;
static struct nvgpu_tsg *tsg = NULL;
static int verify_error_notifier(struct nvgpu_channel *ch, u32 error_notifier)
{
struct nvgpu_posix_channel *cp = ch->os_priv;
if (cp == NULL) {
return UNIT_FAIL;
} else if (cp->err_notifier.error == error_notifier &&
cp->err_notifier.status == 0xffff) {
return UNIT_SUCCESS;
} else {
return UNIT_FAIL;
}
}
static void clear_error_notifier(struct nvgpu_channel *ch)
{
struct nvgpu_posix_channel *cp = ch->os_priv;
if (cp != NULL) {
cp->err_notifier.error = 0U;
cp->err_notifier.status = 0U;
}
}
int test_rc_init(struct unit_module *m, struct gk20a *g, void *args)
{
int ret = 0;
struct nvgpu_posix_channel *posix_channel = NULL;
ret = test_fifo_setup_gv11b_reg_space(m, g);
if (ret != 0) {
unit_return_fail(m, "fifo reg_space failure");
}
nvgpu_device_init(g);
g->ops.gr.init.get_no_of_sm = stub_gv11b_gr_init_get_no_of_sm;
g->ops.ecc.ecc_init_support(g);
g->ops.mm.init_mm_support(g);
ret = nvgpu_fifo_init_support(g);
nvgpu_assert(ret == 0);
/* Do not allocate from vidmem */
nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY, true);
ret = nvgpu_runlist_setup_sw(g);
nvgpu_assert(ret == 0);
tsg = nvgpu_tsg_open(g, getpid());
nvgpu_assert(tsg != NULL);
ch = nvgpu_channel_open_new(g, NVGPU_INVALID_RUNLIST_ID, false,
getpid(), getpid());
if (ch == NULL) {
ret = UNIT_FAIL;
unit_err(m, "failed channel open");
goto clear_tsg;
}
posix_channel = malloc(sizeof(struct nvgpu_posix_channel));
if (posix_channel == NULL) {
unit_err(m, "failed to allocate memory for posix channel");
goto clear_channel;
}
ch->os_priv = posix_channel;
ret = nvgpu_tsg_bind_channel(tsg, ch);
if (ret) {
unit_err(m, "failed to bind channel");
goto clear_posix_channel;
}
return UNIT_SUCCESS;
clear_posix_channel:
free(posix_channel);
clear_channel:
nvgpu_channel_close(ch);
ch = NULL;
clear_tsg:
nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release);
tsg = NULL;
return ret;
}
int test_rc_deinit(struct unit_module *m, struct gk20a *g, void *args)
{
struct nvgpu_posix_channel *posix_channel = ch->os_priv;
int ret = nvgpu_tsg_unbind_channel(tsg, ch);
if (ret != 0) {
ret = UNIT_FAIL;
unit_err(m , "channel already unbound");
}
if (ch != NULL && posix_channel != NULL) {
free(posix_channel);
}
if (ch != NULL) {
nvgpu_channel_close(ch);
}
if (tsg != NULL) {
nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release);
}
if (g->fifo.remove_support) {
g->fifo.remove_support(&g->fifo);
}
return ret;
}
int test_rc_fifo_recover(struct unit_module *m, struct gk20a *g, void *args)
{
g->sw_quiesce_pending = true;
clear_error_notifier(ch);
nvgpu_rc_fifo_recover(g, 0U, 0U, false, false, false, 0U);
g->sw_quiesce_pending = false;
return UNIT_SUCCESS;
}
int test_rc_ctxsw_timeout(struct unit_module *m, struct gk20a *g, void *args)
{
g->sw_quiesce_pending = true;
clear_error_notifier(ch);
nvgpu_rc_ctxsw_timeout(g, 0U, tsg, false);
g->sw_quiesce_pending = false;
return verify_error_notifier(ch, NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
}
int test_rc_runlist_update(struct unit_module *m, struct gk20a *g, void *args)
{
g->sw_quiesce_pending = true;
nvgpu_rc_runlist_update(g, 0U);
g->sw_quiesce_pending = false;
return UNIT_SUCCESS;
}
int test_rc_preempt_timeout(struct unit_module *m, struct gk20a *g, void *args)
{
g->sw_quiesce_pending = true;
clear_error_notifier(ch);
nvgpu_rc_preempt_timeout(g, tsg);
g->sw_quiesce_pending = false;
return verify_error_notifier(ch, NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
}
int test_rc_gr_fault(struct unit_module *m, struct gk20a *g, void *args)
{
g->sw_quiesce_pending = true;
clear_error_notifier(ch);
nvgpu_rc_gr_fault(g, tsg, ch);
g->sw_quiesce_pending = false;
return UNIT_SUCCESS;
}
int test_rc_sched_error_bad_tsg(struct unit_module *m, struct gk20a *g, void *args)
{
g->sw_quiesce_pending = true;
clear_error_notifier(ch);
nvgpu_rc_sched_error_bad_tsg(g);
g->sw_quiesce_pending = false;
return UNIT_SUCCESS;
}
int test_rc_tsg_and_related_engines(struct unit_module *m, struct gk20a *g, void *args)
{
g->sw_quiesce_pending = true;
nvgpu_rc_tsg_and_related_engines(g, tsg, false, RC_TYPE_SCHED_ERR);
g->sw_quiesce_pending = false;
return UNIT_SUCCESS;
}
#define F_RC_MMU_FAULT_ID_INVALID 0
#define F_RC_MMU_FAULT_ID_TYPE_TSG 1
#define F_RC_MMU_FAULT_ID_TYPE_NOT_TSG 2
static const char *f_rc_mmu_fault[] = {
"id_invalid",
"id_type_tsg",
"id_type_not_tsg",
};
int test_rc_mmu_fault(struct unit_module *m, struct gk20a *g, void *args)
{
u32 branches;
u32 id = NVGPU_INVALID_TSG_ID;
u32 id_type = F_RC_MMU_FAULT_ID_TYPE_NOT_TSG;
g->sw_quiesce_pending = true;
clear_error_notifier(ch);
for (branches = 0U; branches <= F_RC_MMU_FAULT_ID_TYPE_NOT_TSG; branches++) {
if (branches != F_RC_MMU_FAULT_ID_INVALID) {
id = tsg->tsgid;
id_type = ID_TYPE_UNKNOWN;
}
if (branches == F_RC_MMU_FAULT_ID_TYPE_TSG) {
id_type = ID_TYPE_TSG;
}
unit_info(m, "%s branch: %s\n", __func__, f_rc_mmu_fault[branches]);
nvgpu_rc_mmu_fault(g, 0U, id, id_type, RC_TYPE_MMU_FAULT, NULL);
}
g->sw_quiesce_pending = false;
return UNIT_SUCCESS;
}
#define F_RC_IS_CHSW_VALID_OR_SAVE 0U
#define F_RC_IS_CHSW_LOAD_OR_SWITCH 1U
#define F_RC_IS_CHSW_INVALID 2U
#define F_RC_ID_TYPE_TSG 0U
#define F_RC_ID_TYPE_CH 1U
#define F_RC_ID_TYPE_INVALID 2U
#define F_RC_ID_TYPE_CH_NULL_CHANNEL 0U
#define F_RC_ID_TYPE_CH_NULL_TSG 1U
#define F_RC_ID_TYPE_CH_FULL 2U
static const char *f_rc_chsw[] = {
"is_chsw_valid_or_save",
"is_chsw_load_or_switch",
"is_chsw_invalid",
};
static const char *f_rc_id_type[] = {
"id_type_tsg",
"id_type_ch",
"id_type_invalid",
};
static const char *f_rc_id_ch_subbranch[] = {
"null_channel",
"null_tsg",
"full",
};
static void set_pbdma_info_id_type(u32 chsw_branches,
struct nvgpu_pbdma_status_info *info,
struct nvgpu_channel *ch_without_tsg,
u32 id_type_branches,
u32 id_type_ch_branches)
{
if (id_type_branches == F_RC_ID_TYPE_TSG) {
info->id = (chsw_branches == F_RC_IS_CHSW_VALID_OR_SAVE) ?
tsg->tsgid : PBDMA_STATUS_ID_INVALID;
info->id_type = (chsw_branches == F_RC_IS_CHSW_VALID_OR_SAVE) ?
PBDMA_STATUS_ID_TYPE_TSGID : PBDMA_STATUS_ID_TYPE_INVALID;
info->next_id = (chsw_branches == F_RC_IS_CHSW_LOAD_OR_SWITCH) ?
tsg->tsgid : PBDMA_STATUS_ID_INVALID;
info->next_id_type = (chsw_branches == F_RC_IS_CHSW_LOAD_OR_SWITCH) ?
PBDMA_STATUS_NEXT_ID_TYPE_TSGID : PBDMA_STATUS_NEXT_ID_TYPE_INVALID;
} else if (id_type_branches == F_RC_ID_TYPE_CH) {
if (id_type_ch_branches == F_RC_ID_TYPE_CH_NULL_CHANNEL) {
info->id = NVGPU_INVALID_CHANNEL_ID;
info->id_type = PBDMA_STATUS_ID_TYPE_CHID;
info->next_id = NVGPU_INVALID_CHANNEL_ID;
info->next_id_type = PBDMA_STATUS_NEXT_ID_TYPE_CHID;
} else if (id_type_ch_branches == F_RC_ID_TYPE_CH_NULL_TSG) {
/* Use ch_without_tsg for NULL TSG branch */
info->id = (chsw_branches == F_RC_IS_CHSW_VALID_OR_SAVE) ?
ch_without_tsg->chid : PBDMA_STATUS_ID_INVALID;
info->id_type = (chsw_branches == F_RC_IS_CHSW_VALID_OR_SAVE) ?
PBDMA_STATUS_ID_TYPE_CHID : PBDMA_STATUS_ID_TYPE_INVALID;
info->next_id = (chsw_branches == F_RC_IS_CHSW_LOAD_OR_SWITCH) ?
ch_without_tsg->chid : PBDMA_STATUS_ID_INVALID;
info->next_id_type = (chsw_branches == F_RC_IS_CHSW_LOAD_OR_SWITCH) ?
PBDMA_STATUS_NEXT_ID_TYPE_CHID : PBDMA_STATUS_NEXT_ID_TYPE_INVALID;
} else {
/* Use ch for full path */
info->id = (chsw_branches == F_RC_IS_CHSW_VALID_OR_SAVE) ?
ch->chid : PBDMA_STATUS_ID_INVALID;
info->id_type = (chsw_branches == F_RC_IS_CHSW_VALID_OR_SAVE) ?
PBDMA_STATUS_ID_TYPE_CHID : PBDMA_STATUS_ID_TYPE_INVALID;
info->next_id = (chsw_branches == F_RC_IS_CHSW_LOAD_OR_SWITCH) ?
ch->chid : PBDMA_STATUS_ID_INVALID;
info->next_id_type = (chsw_branches == F_RC_IS_CHSW_LOAD_OR_SWITCH) ?
PBDMA_STATUS_NEXT_ID_TYPE_CHID : PBDMA_STATUS_NEXT_ID_TYPE_INVALID;
}
} else {
info->id_type = PBDMA_STATUS_ID_INVALID;
info->next_id_type = PBDMA_STATUS_ID_INVALID;
}
}
int test_rc_pbdma_fault(struct unit_module *m, struct gk20a *g, void *args)
{
u32 chsw_branches, id_type_branches;
u32 chsw_subbranch;
struct nvgpu_channel *ch_without_tsg = NULL;
ch_without_tsg = nvgpu_channel_open_new(g, NVGPU_INVALID_RUNLIST_ID, false,
getpid(), getpid());
if (ch_without_tsg == NULL) {
unit_err(m, "failed channel open");
return UNIT_FAIL;
}
g->sw_quiesce_pending = true;
for (chsw_branches = F_RC_IS_CHSW_VALID_OR_SAVE;
chsw_branches <= F_RC_IS_CHSW_INVALID; chsw_branches++) {
struct nvgpu_pbdma_status_info info = {0};
if (chsw_branches == F_RC_IS_CHSW_INVALID) {
info.chsw_status = NVGPU_PBDMA_CHSW_STATUS_INVALID;
unit_info(m, "%s branch: %s\n", __func__, f_rc_chsw[chsw_branches]);
nvgpu_rc_pbdma_fault(g, 0U, NVGPU_ERR_NOTIFIER_PBDMA_ERROR, &info);
continue;
}
for (chsw_subbranch = 0U; chsw_subbranch < 2U; chsw_subbranch++) {
if (chsw_branches == F_RC_IS_CHSW_VALID_OR_SAVE) {
info.chsw_status =
(chsw_subbranch * NVGPU_PBDMA_CHSW_STATUS_VALID) +
((1 - chsw_subbranch) * NVGPU_PBDMA_CHSW_STATUS_SAVE);
} else {
info.chsw_status =
(chsw_subbranch * NVGPU_PBDMA_CHSW_STATUS_LOAD) +
((1 - chsw_subbranch) * NVGPU_PBDMA_CHSW_STATUS_SWITCH);
}
}
for (id_type_branches = F_RC_ID_TYPE_TSG; id_type_branches <= F_RC_ID_TYPE_INVALID;
id_type_branches++) {
u32 id_type_ch_sub_branches = 0U;
if (id_type_branches == F_RC_ID_TYPE_CH) {
for (id_type_ch_sub_branches = F_RC_ID_TYPE_CH_NULL_CHANNEL;
id_type_ch_sub_branches <= F_RC_ID_TYPE_CH_FULL; id_type_ch_sub_branches++) {
set_pbdma_info_id_type(chsw_branches, &info, ch_without_tsg,
id_type_branches, id_type_ch_sub_branches);
unit_info(m, "%s branch: %s - %s - %s\n", __func__,
f_rc_chsw[chsw_branches],
f_rc_id_type[id_type_branches],
f_rc_id_ch_subbranch[id_type_ch_sub_branches]);
nvgpu_rc_pbdma_fault(g, 0U, NVGPU_ERR_NOTIFIER_PBDMA_ERROR, &info);
}
} else {
set_pbdma_info_id_type(chsw_branches, &info, ch_without_tsg,
id_type_branches, id_type_ch_sub_branches);
unit_info(m, "%s branch: %s - %s\n", __func__,
f_rc_chsw[chsw_branches],
f_rc_id_type[id_type_branches]);
nvgpu_rc_pbdma_fault(g, 0U, NVGPU_ERR_NOTIFIER_PBDMA_ERROR, &info);
}
}
}
g->sw_quiesce_pending = false;
nvgpu_channel_close(ch_without_tsg);
return UNIT_SUCCESS;
}
struct unit_module_test nvgpu_rc_tests[] = {
UNIT_TEST(rc_init, test_rc_init, NULL, 0),
UNIT_TEST(rc_fifo_recover, test_rc_fifo_recover, NULL, 0),
UNIT_TEST(rc_ctxsw_timeout, test_rc_ctxsw_timeout, NULL, 0),
UNIT_TEST(rc_runlist_update, test_rc_runlist_update, NULL, 0),
UNIT_TEST(rc_preempt_timeout, test_rc_preempt_timeout, NULL, 0),
UNIT_TEST(rc_gr_fault, test_rc_gr_fault, NULL, 0),
UNIT_TEST(rc_sched_error_bad_tsg, test_rc_sched_error_bad_tsg, NULL, 0),
UNIT_TEST(rc_tsg_and_related_engines, test_rc_tsg_and_related_engines, NULL, 0),
UNIT_TEST(rc_mmu_fault, test_rc_mmu_fault, NULL, 0),
UNIT_TEST(rc_pbdma_fault, test_rc_pbdma_fault, NULL, 0),
UNIT_TEST(rc_deinit, test_rc_deinit, NULL, 0),
};
UNIT_MODULE(nvgpu-rc, nvgpu_rc_tests, UNIT_PRIO_NVGPU_TEST);