Files
linux-nvgpu/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c
mkumbar 87984ea344 gpu: nvgpu: support nvriscv debug feature
Enable nvriscv debug buffer feature in NVGPU.
Debug buffer is a feature to print the debug log from ucode onto console
in real time.
Debug buffer feature uses the DMEM, queue and SWGEN1 interrupt to share
ucode debug data with NVGPU.
Ucode writes debug message to DMEM and updates offset in queue to trigger
interrupt to NVGPU.
NVGPU copies the debug message from DMEM to local buffer to process and
print onto console.

Debug buffer feature is added under falcon unit and required engine
can utilize the feature by providing required param through public
functions.

Currently GA10B NVRISCV NS/LS PMU ucode has support for this feature
and enabled support on NVGPU side by adding required changes, with this
feature enabled, it is now possible to see prints in real time.

JIRA NVGPU-6959

Change-Id: I9d46020470285b490b6bc876204f62698055b1ec
Signed-off-by: mkumbar <mkumbar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2548951
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2021-07-17 12:45:00 -07:00

361 lines
12 KiB
C

/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/pmu.h>
#include <nvgpu/falcon.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/firmware.h>
#include <nvgpu/mm.h>
#include <nvgpu/io.h>
#include <nvgpu/soc.h>
#include "pmu_ga10b.h"
#include <nvgpu/hw/ga10b/hw_pwr_ga10b.h>
bool ga10b_is_pmu_supported(struct gk20a *g)
{
return nvgpu_platform_is_simulation(g) ? false : true;
}
u32 ga10b_pmu_falcon2_base_addr(void)
{
return pwr_falcon2_pwr_base_r();
}
u32 ga10b_pmu_get_irqmask(struct gk20a *g)
{
u32 mask = 0U;
if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
nvgpu_pmu_dbg(g, "RISCV core INTR");
mask = nvgpu_readl(g, pwr_riscv_irqmask_r());
mask &= nvgpu_readl(g, pwr_riscv_irqdest_r());
} else {
nvgpu_pmu_dbg(g, "Falcon core INTR");
mask = nvgpu_readl(g, pwr_falcon_irqmask_r());
mask &= nvgpu_readl(g, pwr_falcon_irqdest_r());
}
return mask;
}
static int ga10b_pmu_ns_falcon_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu,
u32 args_offset)
{
struct mm_gk20a *mm = &g->mm;
struct nvgpu_firmware *fw = NULL;
struct pmu_ucode_desc_v1 *desc = NULL;
u32 addr_code_lo, addr_data_lo, addr_load_lo;
u32 addr_code_hi, addr_data_hi;
u32 blocks, i, err;
u32 inst_block_ptr;
nvgpu_log_fn(g, " ");
fw = nvgpu_pmu_fw_desc_desc(g, pmu);
desc = (struct pmu_ucode_desc_v1 *)(void *)fw->data;
nvgpu_writel(g, pwr_falcon_itfen_r(),
nvgpu_readl(g, pwr_falcon_itfen_r()) |
pwr_falcon_itfen_ctxen_enable_f());
inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->pmu.inst_block);
nvgpu_writel(g, pwr_pmu_new_instblk_r(),
pwr_pmu_new_instblk_ptr_f(inst_block_ptr) |
pwr_pmu_new_instblk_valid_f(1) |
(nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
pwr_pmu_new_instblk_target_sys_coh_f() :
pwr_pmu_new_instblk_target_sys_ncoh_f()));
nvgpu_writel(g, pwr_falcon_dmemc_r(0),
pwr_falcon_dmemc_offs_f(0) |
pwr_falcon_dmemc_blk_f(0) |
pwr_falcon_dmemc_aincw_f(1));
addr_code_lo = u64_lo32(right_shift_8bits((pmu->fw->ucode.gpu_va +
desc->app_start_offset +
desc->app_resident_code_offset)));
addr_code_hi = u64_hi32(right_shift_8bits((pmu->fw->ucode.gpu_va +
desc->app_start_offset +
desc->app_resident_code_offset)));
addr_data_lo = u64_lo32(right_shift_8bits((pmu->fw->ucode.gpu_va +
desc->app_start_offset +
desc->app_resident_data_offset)));
addr_data_hi = u64_hi32(right_shift_8bits((pmu->fw->ucode.gpu_va +
desc->app_start_offset +
desc->app_resident_data_offset)));
addr_load_lo = u64_lo32(right_shift_8bits((pmu->fw->ucode.gpu_va +
desc->bootloader_start_offset)));
nvgpu_writel(g, pwr_falcon_dmemd_r(0), DMEM_DATA_0);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), DMEM_DATA_0);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), DMEM_DATA_0);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), DMEM_DATA_0);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), DMEM_DATA_0);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), DMEM_DATA_0);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), DMEM_DATA_0);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), DMEM_DATA_0);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), GK20A_PMU_DMAIDX_UCODE);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), left_shift_8bits(addr_code_lo));
nvgpu_writel(g, pwr_falcon_dmemd_r(0), addr_code_hi);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_offset);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_code_size);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), DMEM_DATA_0);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), DMEM_DATA_0);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), desc->app_imem_entry);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), left_shift_8bits(addr_data_lo));
nvgpu_writel(g, pwr_falcon_dmemd_r(0), addr_data_hi);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), DMEM_DATA_1);
nvgpu_writel(g, pwr_falcon_dmemd_r(0), args_offset);
g->ops.pmu.write_dmatrfbase(g,
addr_load_lo -
(right_shift_8bits(desc->bootloader_imem_offset)));
blocks = right_shift_8bits(((desc->bootloader_size + U8_MAX) & ~U8_MAX));
for (i = DMA_OFFSET_START; i < blocks; i++) {
nvgpu_writel(g, pwr_falcon_dmatrfmoffs_r(),
desc->bootloader_imem_offset + left_shift_8bits(i));
nvgpu_writel(g, pwr_falcon_dmatrffboffs_r(),
desc->bootloader_imem_offset + left_shift_8bits(i));
nvgpu_writel(g, pwr_falcon_dmatrfcmd_r(),
pwr_falcon_dmatrfcmd_imem_f(1) |
pwr_falcon_dmatrfcmd_write_f(0) |
pwr_falcon_dmatrfcmd_size_f(6) |
pwr_falcon_dmatrfcmd_ctxdma_f(GK20A_PMU_DMAIDX_UCODE));
}
err = nvgpu_falcon_bootstrap(pmu->flcn, desc->bootloader_entry_point);
nvgpu_writel(g, pwr_falcon_os_r(), desc->app_version);
return err;
}
u32 ga10b_pmu_get_inst_block_config(struct gk20a *g)
{
struct mm_gk20a *mm = &g->mm;
u32 inst_block_ptr = 0;
inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->pmu.inst_block);
return (pwr_pmu_new_instblk_ptr_f(inst_block_ptr) |
pwr_pmu_new_instblk_valid_f(1) |
(nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
pwr_pmu_new_instblk_target_sys_coh_f() :
pwr_pmu_new_instblk_target_sys_ncoh_f()));
}
static int ga10b_pmu_ns_nvriscv_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu,
u32 args_offset)
{
struct falcon_next_core_ucode_desc *desc;
struct pmu_rtos_fw *rtos_fw = g->pmu->fw;
u64 fmc_code_addr = 0;
u64 fmc_data_addr = 0;
u64 manifest_addr = 0;
desc = (struct falcon_next_core_ucode_desc *)(void *)
rtos_fw->fw_desc->data;
fmc_code_addr = right_shift_8bits((nvgpu_mem_get_addr(g,
&pmu->fw->ucode) + desc->monitor_code_offset));
fmc_data_addr = right_shift_8bits((nvgpu_mem_get_addr(g,
&pmu->fw->ucode) + desc->monitor_data_offset));
manifest_addr = right_shift_8bits((nvgpu_mem_get_addr(g,
&pmu->fw->ucode) + desc->manifest_offset));
g->ops.falcon.brom_config(pmu->flcn, fmc_code_addr, fmc_data_addr,
manifest_addr);
g->ops.falcon.bootstrap(pmu->flcn, 0U);
return 0;
}
int ga10b_pmu_ns_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu,
u32 args_offset)
{
int err = 0;
if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
err = ga10b_pmu_ns_nvriscv_bootstrap(g, pmu, args_offset);
} else {
err = ga10b_pmu_ns_falcon_bootstrap(g, pmu, args_offset);
}
return err;
}
void ga10b_pmu_dump_elpg_stats(struct nvgpu_pmu *pmu)
{
struct gk20a *g = pmu->g;
nvgpu_pmu_dbg(g, "pwr_pmu_idle_mask_supp_r(3): 0x%08x",
nvgpu_readl(g, pwr_pmu_idle_mask_supp_r(3)));
nvgpu_pmu_dbg(g, "pwr_pmu_idle_mask_1_supp_r(3): 0x%08x",
nvgpu_readl(g, pwr_pmu_idle_mask_1_supp_r(3)));
nvgpu_pmu_dbg(g, "pwr_pmu_idle_ctrl_supp_r(3): 0x%08x",
nvgpu_readl(g, pwr_pmu_idle_ctrl_supp_r(3)));
nvgpu_pmu_dbg(g, "pwr_pmu_pg_idle_cnt_r(0): 0x%08x",
nvgpu_readl(g, pwr_pmu_pg_idle_cnt_r(0)));
nvgpu_pmu_dbg(g, "pwr_pmu_pg_intren_r(0): 0x%08x",
nvgpu_readl(g, pwr_pmu_pg_intren_r(0)));
nvgpu_pmu_dbg(g, "pwr_pmu_idle_count_r(3): 0x%08x",
nvgpu_readl(g, pwr_pmu_idle_count_r(3)));
nvgpu_pmu_dbg(g, "pwr_pmu_idle_count_r(4): 0x%08x",
nvgpu_readl(g, pwr_pmu_idle_count_r(4)));
nvgpu_pmu_dbg(g, "pwr_pmu_idle_count_r(7): 0x%08x",
nvgpu_readl(g, pwr_pmu_idle_count_r(7)));
}
void ga10b_pmu_init_perfmon_counter(struct gk20a *g)
{
u32 data;
/* use counter #3 for GR && CE2 busy cycles */
nvgpu_writel(g, pwr_pmu_idle_mask_r(IDLE_COUNTER_3),
pwr_pmu_idle_mask_gr_enabled_f() |
pwr_pmu_idle_mask_ce_2_enabled_f());
/* disable idle filtering for counters 3 and 6 */
data = nvgpu_readl(g, pwr_pmu_idle_ctrl_r(IDLE_COUNTER_3));
data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
pwr_pmu_idle_ctrl_filter_m(),
pwr_pmu_idle_ctrl_value_busy_f() |
pwr_pmu_idle_ctrl_filter_disabled_f());
nvgpu_writel(g, pwr_pmu_idle_ctrl_r(IDLE_COUNTER_3), data);
/* use counter #6 for total cycles */
data = nvgpu_readl(g, pwr_pmu_idle_ctrl_r(IDLE_COUNTER_6));
data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
pwr_pmu_idle_ctrl_filter_m(),
pwr_pmu_idle_ctrl_value_always_f() |
pwr_pmu_idle_ctrl_filter_disabled_f());
nvgpu_writel(g, pwr_pmu_idle_ctrl_r(IDLE_COUNTER_6), data);
/*
* We don't want to disturb counters #3 and #6, which are used by
* perfmon, so we add wiring also to counters #1 and #2 for
* exposing raw counter readings.
*/
nvgpu_writel(g, pwr_pmu_idle_mask_r(IDLE_COUNTER_1),
pwr_pmu_idle_mask_gr_enabled_f() |
pwr_pmu_idle_mask_ce_2_enabled_f());
data = nvgpu_readl(g, pwr_pmu_idle_ctrl_r(IDLE_COUNTER_1));
data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
pwr_pmu_idle_ctrl_filter_m(),
pwr_pmu_idle_ctrl_value_busy_f() |
pwr_pmu_idle_ctrl_filter_disabled_f());
nvgpu_writel(g, pwr_pmu_idle_ctrl_r(IDLE_COUNTER_1), data);
data = nvgpu_readl(g, pwr_pmu_idle_ctrl_r(IDLE_COUNTER_2));
data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
pwr_pmu_idle_ctrl_filter_m(),
pwr_pmu_idle_ctrl_value_always_f() |
pwr_pmu_idle_ctrl_filter_disabled_f());
nvgpu_writel(g, pwr_pmu_idle_ctrl_r(IDLE_COUNTER_2), data);
/*
* use counters 4 and 0 for perfmon to log busy cycles and total
* cycles counter #0 overflow sets pmu idle intr status bit
*/
nvgpu_writel(g, pwr_pmu_idle_intr_r(),
pwr_pmu_idle_intr_en_f(0));
nvgpu_writel(g, pwr_pmu_idle_threshold_r(IDLE_COUNTER_0),
pwr_pmu_idle_threshold_value_f(PMU_IDLE_THRESHOLD_V));
data = nvgpu_readl(g, pwr_pmu_idle_ctrl_r(IDLE_COUNTER_0));
data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
pwr_pmu_idle_ctrl_filter_m(),
pwr_pmu_idle_ctrl_value_always_f() |
pwr_pmu_idle_ctrl_filter_disabled_f());
nvgpu_writel(g, pwr_pmu_idle_ctrl_r(IDLE_COUNTER_0), data);
nvgpu_writel(g, pwr_pmu_idle_mask_r(IDLE_COUNTER_4),
pwr_pmu_idle_mask_gr_enabled_f() |
pwr_pmu_idle_mask_ce_2_enabled_f());
data = nvgpu_readl(g, pwr_pmu_idle_ctrl_r(IDLE_COUNTER_4));
data = set_field(data, pwr_pmu_idle_ctrl_value_m() |
pwr_pmu_idle_ctrl_filter_m(),
pwr_pmu_idle_ctrl_value_busy_f() |
pwr_pmu_idle_ctrl_filter_disabled_f());
nvgpu_writel(g, pwr_pmu_idle_ctrl_r(IDLE_COUNTER_4), data);
nvgpu_writel(g, pwr_pmu_idle_count_r(IDLE_COUNTER_0),
pwr_pmu_idle_count_reset_f(1));
nvgpu_writel(g, pwr_pmu_idle_count_r(IDLE_COUNTER_4),
pwr_pmu_idle_count_reset_f(1));
nvgpu_writel(g, pwr_pmu_idle_intr_status_r(),
pwr_pmu_idle_intr_status_intr_f(1));
}
u32 ga10b_pmu_read_idle_counter(struct gk20a *g, u32 counter_id)
{
return pwr_pmu_idle_count_value_v(
nvgpu_readl(g, pwr_pmu_idle_count_r(counter_id)));
}
void ga10b_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id)
{
nvgpu_writel(g, pwr_pmu_idle_count_r(counter_id),
pwr_pmu_idle_count_reset_f(1));
}
bool ga10b_pmu_is_debug_mode_en(struct gk20a *g)
{
u32 ctl_stat = nvgpu_readl(g, pwr_falcon_hwcfg2_r());
if (pwr_falcon_hwcfg2_dbgmode_v(ctl_stat) ==
pwr_falcon_hwcfg2_dbgmode_enable_v()) {
nvgpu_info(g, "DEBUG MODE");
return true;
} else {
nvgpu_info(g, "PROD MODE");
return false;
}
}
void ga10b_pmu_handle_swgen1_irq(struct gk20a *g, u32 intr)
{
struct nvgpu_pmu *pmu = g->pmu;
int err = 0;
if ((intr & pwr_falcon_irqstat_swgen1_true_f()) != 0U) {
#ifdef CONFIG_NVGPU_FALCON_DEBUG
err = nvgpu_falcon_dbg_buf_display(pmu->flcn);
if (err != 0) {
nvgpu_err(g, "nvgpu_falcon_dbg_buf_display failed err=%d",
err);
}
#endif
}
}