|
|
|
@@ -36,48 +36,130 @@
|
|
|
|
|
#include "hal/fb/vab/vab_ga10b.h"
|
|
|
|
|
#include <nvgpu/hw/ga10b/hw_fb_ga10b.h>
|
|
|
|
|
|
|
|
|
|
#define GA10B_VAB_ENTRY 512U /* = vab_size * 2 */
|
|
|
|
|
#define GA10B_VAB_WRITE_PACKETS 8U /* = num_range_checkers */
|
|
|
|
|
#define GA10B_VAB_WRITE_PACKET_DWORDS 8U /* 512/8 = 64 bytes = 16 words = 8 double words*/
|
|
|
|
|
#define GA10B_VAB_WRITE_PACKET_ACCESS_DWORDS 4U
|
|
|
|
|
|
|
|
|
|
int ga10b_fb_vab_init(struct gk20a *g)
|
|
|
|
|
{
|
|
|
|
|
/* - allocate buffer and mapped in bar2
|
|
|
|
|
* - single entry is 2K bits i.e. 256 bytes
|
|
|
|
|
* - write buffer address to NV_PFB_PRI_MMU_VIDMEM_ACCESS_BIT_BUFFER_LO_ADDR
|
|
|
|
|
* and NV_PFB_PRI_MMU_VIDMEM_ACCESS_BIT_BUFFER_HI_ADDR
|
|
|
|
|
* - write NV_PFB_PRI_MMU_VIDMEM_ACCESS_BIT_BUFFER_SIZE_VAL
|
|
|
|
|
/*
|
|
|
|
|
* On ga10b, there's only one VAB buffer which covers 2048 bits and has another
|
|
|
|
|
* 2048 bits of meta data, thus being a total of 512B.
|
|
|
|
|
*/
|
|
|
|
|
int err = 0;
|
|
|
|
|
size_t num_vab_entries = 2U;
|
|
|
|
|
struct vm_gk20a *vm = g->mm.bar2.vm;
|
|
|
|
|
struct nvgpu_mem *vab_buf = &g->mm.vab.buffer;
|
|
|
|
|
u64 buf_addr = 0ULL;
|
|
|
|
|
#define GA10B_VAB_ENTRY_SIZE 512UL
|
|
|
|
|
#define GA10B_VAB_NUM_ENTRIES 1UL
|
|
|
|
|
#define GA10B_VAB_DUMP_SIZE (GA10B_VAB_ENTRY_SIZE * GA10B_VAB_NUM_ENTRIES)
|
|
|
|
|
|
|
|
|
|
if (!nvgpu_mem_is_valid(&g->mm.vab.buffer)) {
|
|
|
|
|
#define GA10B_VAB_NUM_PACKETS 8UL
|
|
|
|
|
|
|
|
|
|
err = nvgpu_dma_alloc_map_sys(vm,
|
|
|
|
|
num_vab_entries * GA10B_VAB_ENTRY, vab_buf);
|
|
|
|
|
if (err != 0) {
|
|
|
|
|
nvgpu_err(g, "Error in vab buffer alloc in bar2 vm ");
|
|
|
|
|
return -ENOMEM;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
buf_addr = ((u64)(uintptr_t)vab_buf->gpu_va);
|
|
|
|
|
nvgpu_log(g, gpu_dbg_vab, "buf_addr 0x%llx", buf_addr);
|
|
|
|
|
/* Each packet has 64 bytes (32 bytes for access bits and 32 bytes meta) */
|
|
|
|
|
#define GA10B_VAB_PACKET_SIZE_IN_BYTES 64UL
|
|
|
|
|
|
|
|
|
|
/* The access bits are in the first 32 bytes */
|
|
|
|
|
#define GA10B_VAB_PACKET_ACCESS_BITS_SIZE_IN_BYTES 32UL
|
|
|
|
|
|
|
|
|
|
/* Number of bytes written to user */
|
|
|
|
|
#define GA10B_VAB_ACCESS_BITS_TOTAL_SIZE_IN_BYTES \
|
|
|
|
|
(GA10B_VAB_NUM_PACKETS \
|
|
|
|
|
* GA10B_VAB_PACKET_ACCESS_BITS_SIZE_IN_BYTES)
|
|
|
|
|
|
|
|
|
|
static void ga10b_fb_val_release_registers(
|
|
|
|
|
struct gk20a *g)
|
|
|
|
|
{
|
|
|
|
|
u32 vab_buf_size_reg = 0U;
|
|
|
|
|
u32 vab_reg = 0U;
|
|
|
|
|
|
|
|
|
|
vab_buf_size_reg = nvgpu_readl(g,
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_size_r());
|
|
|
|
|
vab_buf_size_reg = set_field(vab_buf_size_reg,
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_size_enable_m(),
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_size_enable_f(
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_size_enable_false_v()));
|
|
|
|
|
nvgpu_writel(g, fb_mmu_vidmem_access_bit_buffer_size_r(), vab_buf_size_reg);
|
|
|
|
|
|
|
|
|
|
vab_reg = nvgpu_readl(g, fb_mmu_vidmem_access_bit_r());
|
|
|
|
|
vab_reg = set_field(vab_reg, fb_mmu_vidmem_access_bit_enable_m(),
|
|
|
|
|
fb_mmu_vidmem_access_bit_enable_f(
|
|
|
|
|
fb_mmu_vidmem_access_bit_enable_false_v()));
|
|
|
|
|
nvgpu_writel(g, fb_mmu_vidmem_access_bit_r(), vab_reg);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* - Disable VAB in GPC
|
|
|
|
|
*/
|
|
|
|
|
g->ops.gr.vab_release(g, vab_reg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void ga10b_fb_vab_init_registers(
|
|
|
|
|
struct gk20a *g, u64 buf_addr)
|
|
|
|
|
{
|
|
|
|
|
nvgpu_writel(g, fb_mmu_vidmem_access_bit_buffer_hi_r(),
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_hi_addr_f(u64_hi32(buf_addr)));
|
|
|
|
|
nvgpu_writel(g, fb_mmu_vidmem_access_bit_buffer_lo_r(),
|
|
|
|
|
(fb_mmu_vidmem_access_bit_buffer_lo_addr_m() &
|
|
|
|
|
u64_lo32(buf_addr)));
|
|
|
|
|
nvgpu_writel(g, fb_mmu_vidmem_access_bit_buffer_size_r(),
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_size_val_f(num_vab_entries));
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_size_val_f(GA10B_VAB_NUM_ENTRIES));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int ga10b_fb_vab_init(struct gk20a *g)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* Allocate a buffer for VAB buffers (512 bytes total) and map it in bar2.
|
|
|
|
|
* Update the hw register to specify the address and number of buffers.
|
|
|
|
|
*/
|
|
|
|
|
int err = 0;
|
|
|
|
|
struct vm_gk20a *vm = g->mm.bar2.vm;
|
|
|
|
|
struct nvgpu_mem *vab_buf = &g->mm.vab.buffer;
|
|
|
|
|
u64 buf_addr = 0ULL;
|
|
|
|
|
|
|
|
|
|
if (!nvgpu_mem_is_valid(&g->mm.vab.buffer)) {
|
|
|
|
|
err = nvgpu_dma_alloc_map_sys(vm,
|
|
|
|
|
GA10B_VAB_ENTRY_SIZE * GA10B_VAB_NUM_ENTRIES, vab_buf);
|
|
|
|
|
if (err != 0) {
|
|
|
|
|
nvgpu_err(g, "Error in vab buffer alloc in bar2 vm ");
|
|
|
|
|
return -ENOMEM;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
buf_addr = vab_buf->gpu_va;
|
|
|
|
|
nvgpu_log(g, gpu_dbg_vab, "buf_addr 0x%llx", buf_addr);
|
|
|
|
|
|
|
|
|
|
ga10b_fb_vab_init_registers(g, buf_addr);
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ga10b_fb_vab_recover(struct gk20a *g)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* This function is called while recovering from an MMU VAB_ERROR fault.
|
|
|
|
|
* It must not perform any operations which may block.
|
|
|
|
|
*/
|
|
|
|
|
struct nvgpu_mem *vab_buf = &g->mm.vab.buffer;
|
|
|
|
|
u32 vab_buf_size_reg = 0;
|
|
|
|
|
u32 vab_reg = 0;
|
|
|
|
|
|
|
|
|
|
ga10b_fb_val_release_registers(g);
|
|
|
|
|
|
|
|
|
|
if (nvgpu_mem_is_valid(vab_buf)) {
|
|
|
|
|
u64 buf_addr = vab_buf->gpu_va;
|
|
|
|
|
ga10b_fb_vab_init_registers(g, buf_addr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Re-enable */
|
|
|
|
|
vab_buf_size_reg = nvgpu_readl(g,
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_size_r());
|
|
|
|
|
vab_buf_size_reg = set_field(vab_buf_size_reg,
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_size_enable_m(),
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_size_enable_f(
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_size_enable_true_v()));
|
|
|
|
|
|
|
|
|
|
nvgpu_writel(g, fb_mmu_vidmem_access_bit_buffer_size_r(),
|
|
|
|
|
vab_buf_size_reg);
|
|
|
|
|
|
|
|
|
|
vab_reg = nvgpu_readl(g, fb_mmu_vidmem_access_bit_r());
|
|
|
|
|
vab_reg = set_field(vab_reg, fb_mmu_vidmem_access_bit_enable_m(),
|
|
|
|
|
fb_mmu_vidmem_access_bit_enable_f(
|
|
|
|
|
fb_mmu_vidmem_access_bit_enable_true_v()));
|
|
|
|
|
|
|
|
|
|
nvgpu_writel(g, fb_mmu_vidmem_access_bit_r(), vab_reg);
|
|
|
|
|
|
|
|
|
|
g->ops.gr.vab_recover(g, vab_reg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int ga10b_fb_vab_config_address_range(struct gk20a *g,
|
|
|
|
|
u32 num_range_checkers,
|
|
|
|
|
struct nvgpu_vab_range_checker *vab_range_checker)
|
|
|
|
@@ -94,7 +176,6 @@ static int ga10b_fb_vab_config_address_range(struct gk20a *g,
|
|
|
|
|
* check range address is not in VPR
|
|
|
|
|
*/
|
|
|
|
|
u32 i = 0U;
|
|
|
|
|
u32 max_range_checkers = fb_mmu_vidmem_access_bit_num_range_checker_v();
|
|
|
|
|
u32 granularity_shift_bits_base = 16U; /* log(64KB) */
|
|
|
|
|
u32 granularity_shift_bits = 0U;
|
|
|
|
|
int err = 0U;
|
|
|
|
@@ -104,8 +185,6 @@ static int ga10b_fb_vab_config_address_range(struct gk20a *g,
|
|
|
|
|
g->mm.vab.user_num_range_checkers = num_range_checkers;
|
|
|
|
|
nvgpu_log(g, gpu_dbg_vab, "num_range_checkers %u", num_range_checkers);
|
|
|
|
|
|
|
|
|
|
nvgpu_assert(num_range_checkers <= max_range_checkers);
|
|
|
|
|
|
|
|
|
|
for (i = 0U; i < num_range_checkers; i++) {
|
|
|
|
|
if (vab_range_checker[i].granularity_shift <
|
|
|
|
|
granularity_shift_bits_base) {
|
|
|
|
@@ -143,6 +222,11 @@ int ga10b_fb_vab_reserve(struct gk20a *g, u32 vab_mode, u32 num_range_checkers,
|
|
|
|
|
|
|
|
|
|
nvgpu_log_fn(g, " ");
|
|
|
|
|
|
|
|
|
|
if (num_range_checkers > fb_mmu_vidmem_access_bit_num_range_checker_v()) {
|
|
|
|
|
nvgpu_err(g, "VAB range range checker config failed");
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
err = ga10b_fb_vab_config_address_range(g, num_range_checkers,
|
|
|
|
|
vab_range_checker);
|
|
|
|
|
if (err != 0) {
|
|
|
|
@@ -206,7 +290,56 @@ fail:
|
|
|
|
|
return err;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int ga10b_fb_vab_dump_and_clear(struct gk20a *g, u64 *user_buf,
|
|
|
|
|
static int ga10b_fb_vab_request_dump(struct gk20a *g)
|
|
|
|
|
{
|
|
|
|
|
struct nvgpu_timeout timeout;
|
|
|
|
|
u32 delay = POLL_DELAY_MIN_US;
|
|
|
|
|
u32 vab_dump_reg;
|
|
|
|
|
u32 trigger_set;
|
|
|
|
|
u32 trigger_reset;
|
|
|
|
|
|
|
|
|
|
/* Set trigger to start vab dump */
|
|
|
|
|
trigger_set = fb_mmu_vidmem_access_bit_dump_trigger_f(
|
|
|
|
|
fb_mmu_vidmem_access_bit_dump_trigger_true_v());
|
|
|
|
|
nvgpu_writel(g, fb_mmu_vidmem_access_bit_dump_r(), trigger_set);
|
|
|
|
|
|
|
|
|
|
/* Wait for trigger to go down */
|
|
|
|
|
trigger_reset = fb_mmu_vidmem_access_bit_dump_trigger_f(
|
|
|
|
|
fb_mmu_vidmem_access_bit_dump_trigger_false_v());
|
|
|
|
|
nvgpu_timeout_init_cpu_timer(g, &timeout, 1000U);
|
|
|
|
|
do {
|
|
|
|
|
vab_dump_reg = nvgpu_readl(g, fb_mmu_vidmem_access_bit_dump_r());
|
|
|
|
|
nvgpu_log(g, gpu_dbg_vab, "vab_dump_reg 0x%x", vab_dump_reg);
|
|
|
|
|
if (fb_mmu_vidmem_access_bit_dump_trigger_v(vab_dump_reg) == trigger_reset) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
nvgpu_usleep_range(delay, delay * 2U);
|
|
|
|
|
delay = min_t(u32, delay << 1, POLL_DELAY_MAX_US);
|
|
|
|
|
} while (nvgpu_timeout_expired(&timeout) == 0);
|
|
|
|
|
return -ETIMEDOUT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int ga10b_fb_vab_query_valid_bit(struct gk20a *g, struct nvgpu_mem *vab_buf,
|
|
|
|
|
u64 valid_offset_in_bytes, u32 *out_valid_wr)
|
|
|
|
|
{
|
|
|
|
|
struct nvgpu_timeout timeout;
|
|
|
|
|
u32 delay = POLL_DELAY_MIN_US;
|
|
|
|
|
u32 valid_wr = 0;
|
|
|
|
|
nvgpu_timeout_init_cpu_timer(g, &timeout, 1000U);
|
|
|
|
|
do {
|
|
|
|
|
nvgpu_mem_rd_n(g, vab_buf, valid_offset_in_bytes, &valid_wr, sizeof(valid_wr));
|
|
|
|
|
if ((valid_wr >> 31U) == 1U) {
|
|
|
|
|
*out_valid_wr = valid_wr;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
nvgpu_usleep_range(delay, delay * 2U);
|
|
|
|
|
delay = min_t(u32, delay << 1, POLL_DELAY_MAX_US);
|
|
|
|
|
} while (nvgpu_timeout_expired(&timeout) == 0);
|
|
|
|
|
nvgpu_err(g, "VAB write bit not valid");
|
|
|
|
|
return -ETIMEDOUT;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int ga10b_fb_vab_dump_and_clear(struct gk20a *g, u8 *user_buf,
|
|
|
|
|
u64 user_buf_size)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
@@ -214,139 +347,91 @@ int ga10b_fb_vab_dump_and_clear(struct gk20a *g, u64 *user_buf,
|
|
|
|
|
* poll NV_PFB_PRI_MMU_VIDMEM_ACCESS_BIT_DUMP_TRIGGER to be cleared
|
|
|
|
|
* clear what? buffer or access bits or buffer_put_ptr
|
|
|
|
|
*/
|
|
|
|
|
const u64 rd_wr_granularity_size = 4ULL;
|
|
|
|
|
int err = 0;
|
|
|
|
|
struct nvgpu_mem *vab_buf = &g->mm.vab.buffer;
|
|
|
|
|
u64 buffer_offset = 0ULL;
|
|
|
|
|
u64 req_buf_size = 0U;
|
|
|
|
|
u32 i = 0U, j = 0U;
|
|
|
|
|
u32 user_dword_offset = 0U;
|
|
|
|
|
u32 user_buf_dwords = 0U;
|
|
|
|
|
u32 vab_size = 0U;
|
|
|
|
|
u32 vab_dump_reg = 0U;
|
|
|
|
|
u64 i = 0U;
|
|
|
|
|
u32 vab_put_ptr = 0U;
|
|
|
|
|
u32 delay = POLL_DELAY_MIN_US;
|
|
|
|
|
struct nvgpu_timeout timeout;
|
|
|
|
|
u32 max_range_checkers = fb_mmu_vidmem_access_bit_num_range_checker_v();
|
|
|
|
|
u32 trigger_set = fb_mmu_vidmem_access_bit_dump_trigger_f(
|
|
|
|
|
fb_mmu_vidmem_access_bit_dump_trigger_true_v());
|
|
|
|
|
u32 trigger_reset = fb_mmu_vidmem_access_bit_dump_trigger_f(
|
|
|
|
|
fb_mmu_vidmem_access_bit_dump_trigger_false_v());
|
|
|
|
|
u64 *wr_pkt = nvgpu_kzalloc(g, nvgpu_safe_mult_u32(sizeof(u64),
|
|
|
|
|
GA10B_VAB_WRITE_PACKET_DWORDS)); /* 64B write packet */
|
|
|
|
|
u32 vab_put_ptr_value = 0U;
|
|
|
|
|
u32 valid_wr = 0U;
|
|
|
|
|
u32 valid_mask = 0x80000000U;
|
|
|
|
|
u64 valid_offset = 0ULL;
|
|
|
|
|
u64 vab_offset = 0ULL;
|
|
|
|
|
u64 valid_offset_in_bytes = 0ULL;
|
|
|
|
|
|
|
|
|
|
if ((user_buf_size % rd_wr_granularity_size) != 0UL) {
|
|
|
|
|
/* Restriction comes from the rd_n/wr_n operations */
|
|
|
|
|
nvgpu_err(g, "user_buf size must 4-bytes-aligned.");
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (user_buf_size < GA10B_VAB_ACCESS_BITS_TOTAL_SIZE_IN_BYTES) {
|
|
|
|
|
nvgpu_err(g, "user_buf size must be at least %lu bytes. Given: %llu",
|
|
|
|
|
GA10B_VAB_ACCESS_BITS_TOTAL_SIZE_IN_BYTES, user_buf_size);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Get buffer address */
|
|
|
|
|
vab_put_ptr = nvgpu_readl(g, fb_mmu_vidmem_access_bit_buffer_put_r());
|
|
|
|
|
|
|
|
|
|
vab_put_ptr_value = fb_mmu_vidmem_access_bit_buffer_put_ptr_v(vab_put_ptr);
|
|
|
|
|
nvgpu_log(g, gpu_dbg_vab, "vab_put_ptr 0x%x", vab_put_ptr);
|
|
|
|
|
|
|
|
|
|
buffer_offset = U64(nvgpu_safe_mult_u32(
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_put_ptr_v(vab_put_ptr),
|
|
|
|
|
GA10B_VAB_ENTRY));
|
|
|
|
|
nvgpu_log(g, gpu_dbg_vab, "buffer_offset 0x%llx", buffer_offset);
|
|
|
|
|
|
|
|
|
|
vab_size = fb_mmu_vidmem_access_bit_size_v(nvgpu_readl(g,
|
|
|
|
|
fb_mmu_vidmem_access_bit_r()));
|
|
|
|
|
/* 1024/8 bytes * 2^vab_size */
|
|
|
|
|
req_buf_size = nvgpu_safe_mult_u64(128ULL, (1ULL << vab_size));
|
|
|
|
|
/* buffer size will correspond to user range checker count */
|
|
|
|
|
req_buf_size = (req_buf_size/max_range_checkers) *
|
|
|
|
|
g->mm.vab.user_num_range_checkers;
|
|
|
|
|
|
|
|
|
|
nvgpu_assert(user_buf_size >= req_buf_size);
|
|
|
|
|
|
|
|
|
|
/* bytes to dwords */
|
|
|
|
|
user_buf_dwords = user_buf_size/8;
|
|
|
|
|
|
|
|
|
|
/* Set trigger to start vab dump */
|
|
|
|
|
nvgpu_writel(g, fb_mmu_vidmem_access_bit_dump_r(), trigger_set);
|
|
|
|
|
|
|
|
|
|
vab_dump_reg = nvgpu_readl(g, fb_mmu_vidmem_access_bit_dump_r());
|
|
|
|
|
nvgpu_log(g, gpu_dbg_vab, "vab_dump_reg 0x%x", vab_dump_reg);
|
|
|
|
|
|
|
|
|
|
nvgpu_timeout_init_cpu_timer(g, &timeout, 1000U);
|
|
|
|
|
|
|
|
|
|
/* Check if trigger is cleared vab bits collection complete */
|
|
|
|
|
do {
|
|
|
|
|
nvgpu_usleep_range(delay, delay * 2U);
|
|
|
|
|
delay = min_t(u32, delay << 1, POLL_DELAY_MAX_US);
|
|
|
|
|
|
|
|
|
|
vab_dump_reg = nvgpu_readl(g, fb_mmu_vidmem_access_bit_dump_r());
|
|
|
|
|
nvgpu_log(g, gpu_dbg_vab, "vab_dump_reg 0x%x", vab_dump_reg);
|
|
|
|
|
} while((fb_mmu_vidmem_access_bit_dump_trigger_v(vab_dump_reg) !=
|
|
|
|
|
trigger_reset) &&
|
|
|
|
|
(nvgpu_timeout_expired(&timeout) == 0));
|
|
|
|
|
|
|
|
|
|
user_dword_offset = 0U;
|
|
|
|
|
|
|
|
|
|
for (i = 0U; i < GA10B_VAB_WRITE_PACKETS; i++) {
|
|
|
|
|
/* Poll valid bit for write packet i */
|
|
|
|
|
valid_offset = (buffer_offset / 4ULL) +
|
|
|
|
|
((i+1) * (GA10B_VAB_WRITE_PACKET_DWORDS * 2)) - 1;
|
|
|
|
|
nvgpu_log(g, gpu_dbg_vab, "Read valid bit at 0x%llx offset",
|
|
|
|
|
valid_offset);
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
|
valid_wr = nvgpu_mem_rd32(g, vab_buf, valid_offset);
|
|
|
|
|
} while (valid_wr != valid_mask);
|
|
|
|
|
|
|
|
|
|
/* Read VAB bits */
|
|
|
|
|
vab_offset = buffer_offset +
|
|
|
|
|
(i * GA10B_VAB_WRITE_PACKET_DWORDS * 8U);
|
|
|
|
|
nvgpu_mem_rd_n(g, vab_buf, vab_offset , (void *)wr_pkt,
|
|
|
|
|
GA10B_VAB_WRITE_PACKET_DWORDS * 8U);
|
|
|
|
|
|
|
|
|
|
/* Copy and print access bits to user buffer */
|
|
|
|
|
for (j = 0U; j < GA10B_VAB_WRITE_PACKET_DWORDS; j++) {
|
|
|
|
|
|
|
|
|
|
if ((user_dword_offset < user_buf_dwords) &&
|
|
|
|
|
(j < GA10B_VAB_WRITE_PACKET_ACCESS_DWORDS)) {
|
|
|
|
|
user_buf[user_dword_offset++] = wr_pkt[j];
|
|
|
|
|
}
|
|
|
|
|
nvgpu_log(g, gpu_dbg_vab, "wr_pkt %d: 0x%016llx",
|
|
|
|
|
j, wr_pkt[j]);
|
|
|
|
|
if (vab_put_ptr_value != 0U) {
|
|
|
|
|
nvgpu_err(g, "unexpected vab_put_ptr value: %u", vab_put_ptr_value);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Clear MSB valid bit to indicate packet read complete */
|
|
|
|
|
nvgpu_mem_wr32(g, vab_buf, valid_offset,
|
|
|
|
|
(valid_wr & ~valid_mask));
|
|
|
|
|
/* Dump VAB */
|
|
|
|
|
err = ga10b_fb_vab_request_dump(g);
|
|
|
|
|
if (err < 0) {
|
|
|
|
|
nvgpu_err(g, "VAB collection failed");
|
|
|
|
|
goto done;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nvgpu_kfree(g, wr_pkt);
|
|
|
|
|
return 0;
|
|
|
|
|
for (i = 0U; i < GA10B_VAB_NUM_PACKETS; i++) {
|
|
|
|
|
/* The valid bit is the very top bit of this packet's 64 bytes */
|
|
|
|
|
valid_offset_in_bytes =
|
|
|
|
|
(i + 1ULL) * GA10B_VAB_PACKET_SIZE_IN_BYTES - rd_wr_granularity_size;
|
|
|
|
|
/* Poll the bit to see if this packet's results are valid */
|
|
|
|
|
err = ga10b_fb_vab_query_valid_bit(g, vab_buf, valid_offset_in_bytes, &valid_wr);
|
|
|
|
|
if (err == 0) {
|
|
|
|
|
/*
|
|
|
|
|
* Read VAB bits. Each packet is 64 bytes, but only 32 are access bytes.
|
|
|
|
|
* User expects contiguous dump of access bits, so some extra calculations
|
|
|
|
|
* are necessary.
|
|
|
|
|
*/
|
|
|
|
|
const u64 num_bytes_to_copy = GA10B_VAB_PACKET_ACCESS_BITS_SIZE_IN_BYTES;
|
|
|
|
|
/* Determine source buffer */
|
|
|
|
|
const u64 vab_offset = i * GA10B_VAB_PACKET_SIZE_IN_BYTES;
|
|
|
|
|
/* Determine destination va */
|
|
|
|
|
u8 *user_buf_destination = user_buf + i * GA10B_VAB_PACKET_ACCESS_BITS_SIZE_IN_BYTES;
|
|
|
|
|
nvgpu_mem_rd_n(g, vab_buf, vab_offset, user_buf_destination, num_bytes_to_copy);
|
|
|
|
|
} else {
|
|
|
|
|
nvgpu_err(g, "Reading packet's %llu failed", i);
|
|
|
|
|
goto clear_valid_bits;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
err = 0;
|
|
|
|
|
|
|
|
|
|
clear_valid_bits:
|
|
|
|
|
/*
|
|
|
|
|
* Clear MSB valid bits to indicate packets were read.
|
|
|
|
|
* All bits need to be cleared even if querying failed for any of the bits.
|
|
|
|
|
*/
|
|
|
|
|
valid_wr = 0;
|
|
|
|
|
for (i = 0U; i < GA10B_VAB_NUM_PACKETS; i++) {
|
|
|
|
|
valid_offset_in_bytes =
|
|
|
|
|
(i + 1ULL) * GA10B_VAB_PACKET_SIZE_IN_BYTES - rd_wr_granularity_size;
|
|
|
|
|
nvgpu_mem_wr_n(g, vab_buf, valid_offset_in_bytes, &valid_wr, sizeof(valid_wr));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
done:
|
|
|
|
|
return err;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int ga10b_fb_vab_release(struct gk20a *g)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* - reset NV_PFB_PRI_MMU_VIDMEM_ACCESS_BIT_BUFFER_SIZE_ENABLE
|
|
|
|
|
* - reset NV_PFB_PRI_MMU_VIDMEM_ACCESS_BIT_ENABLE
|
|
|
|
|
*/
|
|
|
|
|
u32 vab_buf_size_reg = 0U;
|
|
|
|
|
u32 vab_reg = 0U;
|
|
|
|
|
|
|
|
|
|
nvgpu_log_fn(g, " ");
|
|
|
|
|
|
|
|
|
|
vab_buf_size_reg = nvgpu_readl(g,
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_size_r());
|
|
|
|
|
vab_buf_size_reg = set_field(vab_buf_size_reg,
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_size_enable_m(),
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_size_enable_f(
|
|
|
|
|
fb_mmu_vidmem_access_bit_buffer_size_enable_false_v()));
|
|
|
|
|
nvgpu_writel(g, fb_mmu_vidmem_access_bit_buffer_size_r(), vab_buf_size_reg);
|
|
|
|
|
|
|
|
|
|
vab_reg = nvgpu_readl(g, fb_mmu_vidmem_access_bit_r());
|
|
|
|
|
vab_reg = set_field(vab_reg, fb_mmu_vidmem_access_bit_enable_m(),
|
|
|
|
|
fb_mmu_vidmem_access_bit_enable_f(
|
|
|
|
|
fb_mmu_vidmem_access_bit_enable_false_v()));
|
|
|
|
|
nvgpu_writel(g, fb_mmu_vidmem_access_bit_r(), vab_reg);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* - Disable VAB in GPC
|
|
|
|
|
*/
|
|
|
|
|
g->ops.gr.vab_release(g, vab_reg);
|
|
|
|
|
ga10b_fb_val_release_registers(g);
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|