gpu: nvgpu: modify vab implementation

Currently, VAB implementation is using fixed number of access bits. This
value can be computed using fb_mmu_vidmem_access_bit_size_f() value.
- Modify VAB implementation to compute number of access bits.
- Modify nvgpu_vab structure to hold VAB entry size corresponding to
number of access bits.
- Information given by nvgpu_vab structure is more related to the GPU
than nvgpu_mm structure. Move nvgpu_vab struct element to gk20a struct.
- Add fb.set_vab_buffer_address to update vab buffer address in hw
registers.
- Rename gr.vab_init HAL to gr.vab_reserve to avoid any confusion about
when this HAL should be used.
-Replace gr.vab_release and gr.vab_recover with gr.vab_configure HAL.

Bug 3465734

Change-Id: I1b67bfa9be7728be5bda978c6bb87b196d55ab65
Signed-off-by: Vedashree Vidwans <vvidwans@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2659467
Reviewed-by: svcacv <svcacv@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Martin Radev <mradev@nvidia.com>
Reviewed-by: Seema Khowala <seemaj@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Vedashree Vidwans
2022-01-26 16:10:17 -08:00
committed by mobile promotions
parent e96746cfcd
commit 9513679796
11 changed files with 171 additions and 160 deletions

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -36,15 +36,8 @@
#include "hal/fb/vab/vab_ga10b.h" #include "hal/fb/vab/vab_ga10b.h"
#include <nvgpu/hw/ga10b/hw_fb_ga10b.h> #include <nvgpu/hw/ga10b/hw_fb_ga10b.h>
/* /* Currently, single VAB entry should suffice */
* On ga10b, there's only one VAB buffer which covers 2048 bits and has another
* 2048 bits of meta data, thus being a total of 512B.
*/
#define GA10B_VAB_ENTRY_SIZE 512UL
#define GA10B_VAB_NUM_ENTRIES 1UL #define GA10B_VAB_NUM_ENTRIES 1UL
#define GA10B_VAB_DUMP_SIZE (GA10B_VAB_ENTRY_SIZE * GA10B_VAB_NUM_ENTRIES)
#define GA10B_VAB_NUM_PACKETS 8UL
/* Each packet has 64 bytes (32 bytes for access bits and 32 bytes meta) */ /* Each packet has 64 bytes (32 bytes for access bits and 32 bytes meta) */
#define GA10B_VAB_PACKET_SIZE_IN_BYTES 64UL #define GA10B_VAB_PACKET_SIZE_IN_BYTES 64UL
@@ -52,39 +45,56 @@
/* The access bits are in the first 32 bytes */ /* The access bits are in the first 32 bytes */
#define GA10B_VAB_PACKET_ACCESS_BITS_SIZE_IN_BYTES 32UL #define GA10B_VAB_PACKET_ACCESS_BITS_SIZE_IN_BYTES 32UL
/* Number of bytes written to user */ int ga10b_fb_vab_init(struct gk20a *g)
#define GA10B_VAB_ACCESS_BITS_TOTAL_SIZE_IN_BYTES \
(GA10B_VAB_NUM_PACKETS \
* GA10B_VAB_PACKET_ACCESS_BITS_SIZE_IN_BYTES)
static void ga10b_fb_val_release_registers(
struct gk20a *g)
{ {
u32 vab_buf_size_reg = 0U; int err = 0;
struct vm_gk20a *vm = g->mm.bar2.vm;
struct nvgpu_vab *vab = &g->vab;
struct nvgpu_mem *vab_buf = &g->vab.buffer;
u32 vab_reg = 0U; u32 vab_reg = 0U;
u32 vab_size_exponent = 0U;
unsigned long vab_size_bytes = 0UL;
unsigned long vab_entry_size = 0UL;
vab_buf_size_reg = nvgpu_readl(g, /* Retrieve VAB access bits length */
fb_mmu_vidmem_access_bit_buffer_size_r());
vab_buf_size_reg = set_field(vab_buf_size_reg,
fb_mmu_vidmem_access_bit_buffer_size_enable_m(),
fb_mmu_vidmem_access_bit_buffer_size_enable_f(
fb_mmu_vidmem_access_bit_buffer_size_enable_false_v()));
nvgpu_writel(g, fb_mmu_vidmem_access_bit_buffer_size_r(), vab_buf_size_reg);
vab_reg = nvgpu_readl(g, fb_mmu_vidmem_access_bit_r()); vab_reg = nvgpu_readl(g, fb_mmu_vidmem_access_bit_r());
vab_reg = set_field(vab_reg, fb_mmu_vidmem_access_bit_enable_m(), vab_size_exponent = fb_mmu_vidmem_access_bit_size_v(vab_reg);
fb_mmu_vidmem_access_bit_enable_f(
fb_mmu_vidmem_access_bit_enable_false_v()));
nvgpu_writel(g, fb_mmu_vidmem_access_bit_r(), vab_reg);
/* /*
* - Disable VAB in GPC * VAB access bits = 1024 * (0x1 << exponent)
* VAB access bytes = 128 * (0x1 << exponent)
*/ */
g->ops.gr.vab_release(g, vab_reg); vab_size_bytes = 128UL * (0x1UL << vab_size_exponent);
nvgpu_log(g, gpu_dbg_vab, "vab access bytes %lu", vab_size_bytes);
/*
* VAB dump packets have fixed width of 64B
* Each packet contains 32B access bits and 32B meta data.
* Thus, total entry size is twice of the VAB access bits.
*/
vab_entry_size = nvgpu_safe_mult_u32(vab_size_bytes, 2UL);
nvgpu_log(g, gpu_dbg_vab, "vab_entry_size 0x%lx", vab_entry_size);
vab->entry_size = vab_entry_size;
vab->num_entries = GA10B_VAB_NUM_ENTRIES;
if (!nvgpu_mem_is_valid(vab_buf)) {
/* Allocate memory for single VAB entry */
err = nvgpu_dma_alloc_map_sys(vm, nvgpu_safe_mult_u32(
vab->entry_size, vab->num_entries), vab_buf);
if (err != 0) {
nvgpu_err(g, "Error in vab buffer alloc in bar2 vm ");
return -ENOMEM;
}
}
nvgpu_log(g, gpu_dbg_vab, "buf_addr 0x%llx", vab_buf->gpu_va);
g->ops.fb.vab.set_vab_buffer_address(g, vab_buf->gpu_va);
return 0;
} }
static void ga10b_fb_vab_init_registers( void ga10b_fb_vab_set_vab_buffer_address(struct gk20a *g, u64 buf_addr)
struct gk20a *g, u64 buf_addr)
{ {
nvgpu_writel(g, fb_mmu_vidmem_access_bit_buffer_hi_r(), nvgpu_writel(g, fb_mmu_vidmem_access_bit_buffer_hi_r(),
fb_mmu_vidmem_access_bit_buffer_hi_addr_f(u64_hi32(buf_addr))); fb_mmu_vidmem_access_bit_buffer_hi_addr_f(u64_hi32(buf_addr)));
@@ -92,34 +102,47 @@ static void ga10b_fb_vab_init_registers(
(fb_mmu_vidmem_access_bit_buffer_lo_addr_m() & (fb_mmu_vidmem_access_bit_buffer_lo_addr_m() &
u64_lo32(buf_addr))); u64_lo32(buf_addr)));
nvgpu_writel(g, fb_mmu_vidmem_access_bit_buffer_size_r(), nvgpu_writel(g, fb_mmu_vidmem_access_bit_buffer_size_r(),
fb_mmu_vidmem_access_bit_buffer_size_val_f(GA10B_VAB_NUM_ENTRIES)); fb_mmu_vidmem_access_bit_buffer_size_val_f(
g->vab.num_entries));
} }
int ga10b_fb_vab_init(struct gk20a *g) static void ga10b_fb_vab_enable(struct gk20a *g, bool enable)
{ {
u32 vab_buf_size_reg = 0U;
u32 vab_reg = 0U;
vab_buf_size_reg = nvgpu_readl(g,
fb_mmu_vidmem_access_bit_buffer_size_r());
vab_reg = nvgpu_readl(g, fb_mmu_vidmem_access_bit_r());
if (enable) {
vab_buf_size_reg = set_field(vab_buf_size_reg,
fb_mmu_vidmem_access_bit_buffer_size_enable_m(),
fb_mmu_vidmem_access_bit_buffer_size_enable_f(
fb_mmu_vidmem_access_bit_buffer_size_enable_true_v()));
vab_reg = set_field(vab_reg,
fb_mmu_vidmem_access_bit_enable_m(),
fb_mmu_vidmem_access_bit_enable_f(
fb_mmu_vidmem_access_bit_enable_true_v()));
} else {
vab_buf_size_reg = set_field(vab_buf_size_reg,
fb_mmu_vidmem_access_bit_buffer_size_enable_m(),
fb_mmu_vidmem_access_bit_buffer_size_enable_f(
fb_mmu_vidmem_access_bit_buffer_size_enable_false_v()));
vab_reg = set_field(vab_reg,
fb_mmu_vidmem_access_bit_enable_m(),
fb_mmu_vidmem_access_bit_enable_f(
fb_mmu_vidmem_access_bit_enable_false_v()));
}
nvgpu_writel(g, fb_mmu_vidmem_access_bit_buffer_size_r(),
vab_buf_size_reg);
nvgpu_writel(g, fb_mmu_vidmem_access_bit_r(), vab_reg);
/* /*
* Allocate a buffer for VAB buffers (512 bytes total) and map it in bar2. * - Configure VAB in GPC
* Update the hw register to specify the address and number of buffers.
*/ */
int err = 0; g->ops.gr.vab_configure(g, vab_reg);
struct vm_gk20a *vm = g->mm.bar2.vm;
struct nvgpu_mem *vab_buf = &g->mm.vab.buffer;
u64 buf_addr = 0ULL;
if (!nvgpu_mem_is_valid(&g->mm.vab.buffer)) {
err = nvgpu_dma_alloc_map_sys(vm,
GA10B_VAB_ENTRY_SIZE * GA10B_VAB_NUM_ENTRIES, vab_buf);
if (err != 0) {
nvgpu_err(g, "Error in vab buffer alloc in bar2 vm ");
return -ENOMEM;
}
}
buf_addr = vab_buf->gpu_va;
nvgpu_log(g, gpu_dbg_vab, "buf_addr 0x%llx", buf_addr);
ga10b_fb_vab_init_registers(g, buf_addr);
return 0;
} }
void ga10b_fb_vab_recover(struct gk20a *g) void ga10b_fb_vab_recover(struct gk20a *g)
@@ -128,36 +151,16 @@ void ga10b_fb_vab_recover(struct gk20a *g)
* This function is called while recovering from an MMU VAB_ERROR fault. * This function is called while recovering from an MMU VAB_ERROR fault.
* It must not perform any operations which may block. * It must not perform any operations which may block.
*/ */
struct nvgpu_mem *vab_buf = &g->mm.vab.buffer; struct nvgpu_mem *vab_buf = &g->vab.buffer;
u32 vab_buf_size_reg = 0;
u32 vab_reg = 0;
ga10b_fb_val_release_registers(g); ga10b_fb_vab_enable(g, false);
if (nvgpu_mem_is_valid(vab_buf)) { if (nvgpu_mem_is_valid(vab_buf)) {
u64 buf_addr = vab_buf->gpu_va; g->ops.fb.vab.set_vab_buffer_address(g, vab_buf->gpu_va);
ga10b_fb_vab_init_registers(g, buf_addr);
} }
/* Re-enable */ /* Re-enable */
vab_buf_size_reg = nvgpu_readl(g, ga10b_fb_vab_enable(g, true);
fb_mmu_vidmem_access_bit_buffer_size_r());
vab_buf_size_reg = set_field(vab_buf_size_reg,
fb_mmu_vidmem_access_bit_buffer_size_enable_m(),
fb_mmu_vidmem_access_bit_buffer_size_enable_f(
fb_mmu_vidmem_access_bit_buffer_size_enable_true_v()));
nvgpu_writel(g, fb_mmu_vidmem_access_bit_buffer_size_r(),
vab_buf_size_reg);
vab_reg = nvgpu_readl(g, fb_mmu_vidmem_access_bit_r());
vab_reg = set_field(vab_reg, fb_mmu_vidmem_access_bit_enable_m(),
fb_mmu_vidmem_access_bit_enable_f(
fb_mmu_vidmem_access_bit_enable_true_v()));
nvgpu_writel(g, fb_mmu_vidmem_access_bit_r(), vab_reg);
g->ops.gr.vab_recover(g, vab_reg);
} }
static int ga10b_fb_vab_config_address_range(struct gk20a *g, static int ga10b_fb_vab_config_address_range(struct gk20a *g,
@@ -182,7 +185,7 @@ static int ga10b_fb_vab_config_address_range(struct gk20a *g,
nvgpu_log_fn(g, " "); nvgpu_log_fn(g, " ");
g->mm.vab.user_num_range_checkers = num_range_checkers; g->vab.user_num_range_checkers = num_range_checkers;
nvgpu_log(g, gpu_dbg_vab, "num_range_checkers %u", num_range_checkers); nvgpu_log(g, gpu_dbg_vab, "num_range_checkers %u", num_range_checkers);
for (i = 0U; i < num_range_checkers; i++) { for (i = 0U; i < num_range_checkers; i++) {
@@ -216,7 +219,6 @@ static int ga10b_fb_vab_config_address_range(struct gk20a *g,
int ga10b_fb_vab_reserve(struct gk20a *g, u32 vab_mode, u32 num_range_checkers, int ga10b_fb_vab_reserve(struct gk20a *g, u32 vab_mode, u32 num_range_checkers,
struct nvgpu_vab_range_checker *vab_range_checker) struct nvgpu_vab_range_checker *vab_range_checker)
{ {
u32 vab_buf_size_reg = 0U;
u32 vab_reg = 0U; u32 vab_reg = 0U;
int err = 0U; int err = 0U;
@@ -234,19 +236,6 @@ int ga10b_fb_vab_reserve(struct gk20a *g, u32 vab_mode, u32 num_range_checkers,
goto fail; goto fail;
} }
/*
* - set NV_PFB_PRI_MMU_VIDMEM_ACCESS_BIT_BUFFER_SIZE_ENABLE
*/
vab_buf_size_reg = nvgpu_readl(g,
fb_mmu_vidmem_access_bit_buffer_size_r());
vab_buf_size_reg = set_field(vab_buf_size_reg,
fb_mmu_vidmem_access_bit_buffer_size_enable_m(),
fb_mmu_vidmem_access_bit_buffer_size_enable_f(
fb_mmu_vidmem_access_bit_buffer_size_enable_true_v()));
nvgpu_writel(g, fb_mmu_vidmem_access_bit_buffer_size_r(),
vab_buf_size_reg);
/* /*
* - Update NV_PFB_PRI_MMU_VIDMEM_ACCESS_BIT settings * - Update NV_PFB_PRI_MMU_VIDMEM_ACCESS_BIT settings
*/ */
@@ -274,18 +263,15 @@ int ga10b_fb_vab_reserve(struct gk20a *g, u32 vab_mode, u32 num_range_checkers,
goto fail; goto fail;
} }
/* Enable VAB */
vab_reg = set_field(vab_reg, fb_mmu_vidmem_access_bit_enable_m(),
fb_mmu_vidmem_access_bit_enable_f(
fb_mmu_vidmem_access_bit_enable_true_v()));
nvgpu_writel(g, fb_mmu_vidmem_access_bit_r(), vab_reg); nvgpu_writel(g, fb_mmu_vidmem_access_bit_r(), vab_reg);
/* /*
* Enable VAB in GPC * Setup VAB in GPC
*/ */
g->ops.gr.vab_init(g, vab_reg, num_range_checkers, vab_range_checker); g->ops.gr.vab_reserve(g, vab_reg, num_range_checkers, vab_range_checker);
/* Enable VAB */
ga10b_fb_vab_enable(g, true);
fail: fail:
return err; return err;
} }
@@ -310,7 +296,8 @@ static int ga10b_fb_vab_request_dump(struct gk20a *g)
do { do {
vab_dump_reg = nvgpu_readl(g, fb_mmu_vidmem_access_bit_dump_r()); vab_dump_reg = nvgpu_readl(g, fb_mmu_vidmem_access_bit_dump_r());
nvgpu_log(g, gpu_dbg_vab, "vab_dump_reg 0x%x", vab_dump_reg); nvgpu_log(g, gpu_dbg_vab, "vab_dump_reg 0x%x", vab_dump_reg);
if (fb_mmu_vidmem_access_bit_dump_trigger_v(vab_dump_reg) == trigger_reset) { if (fb_mmu_vidmem_access_bit_dump_trigger_v(vab_dump_reg) ==
trigger_reset) {
return 0; return 0;
} }
nvgpu_usleep_range(delay, delay * 2U); nvgpu_usleep_range(delay, delay * 2U);
@@ -319,15 +306,16 @@ static int ga10b_fb_vab_request_dump(struct gk20a *g)
return -ETIMEDOUT; return -ETIMEDOUT;
} }
static int ga10b_fb_vab_query_valid_bit(struct gk20a *g, struct nvgpu_mem *vab_buf, static int ga10b_fb_vab_query_valid_bit(struct gk20a *g,
u64 valid_offset_in_bytes, u32 *out_valid_wr) struct nvgpu_mem *vab_buf, u64 valid_offset_in_bytes, u32 *out_valid_wr)
{ {
struct nvgpu_timeout timeout; struct nvgpu_timeout timeout;
u32 delay = POLL_DELAY_MIN_US; u32 delay = POLL_DELAY_MIN_US;
u32 valid_wr = 0; u32 valid_wr = 0;
nvgpu_timeout_init_cpu_timer(g, &timeout, 1000U); nvgpu_timeout_init_cpu_timer(g, &timeout, 1000U);
do { do {
nvgpu_mem_rd_n(g, vab_buf, valid_offset_in_bytes, &valid_wr, sizeof(valid_wr)); nvgpu_mem_rd_n(g, vab_buf, valid_offset_in_bytes, &valid_wr,
sizeof(valid_wr));
if ((valid_wr >> 31U) == 1U) { if ((valid_wr >> 31U) == 1U) {
*out_valid_wr = valid_wr; *out_valid_wr = valid_wr;
return 0; return 0;
@@ -349,32 +337,43 @@ int ga10b_fb_vab_dump_and_clear(struct gk20a *g, u8 *user_buf,
*/ */
const u64 rd_wr_granularity_size = 4ULL; const u64 rd_wr_granularity_size = 4ULL;
int err = 0; int err = 0;
struct nvgpu_mem *vab_buf = &g->mm.vab.buffer; struct nvgpu_mem *vab_buf = &g->vab.buffer;
u64 i = 0U; struct nvgpu_vab *vab = &g->vab;
u32 vab_put_ptr = 0U; u32 vab_put_ptr = 0U;
u32 vab_put_ptr_value = 0U; u32 vab_put_ptr_value = 0U;
u32 valid_wr = 0U; u32 valid_wr = 0U;
unsigned long i = 0UL;
unsigned long vab_num_packets = 0UL;
unsigned long vab_user_buf_min_size_bytes = 0UL;
u64 valid_offset_in_bytes = 0ULL; u64 valid_offset_in_bytes = 0ULL;
vab_num_packets = vab->entry_size / GA10B_VAB_PACKET_SIZE_IN_BYTES;
nvgpu_log(g, gpu_dbg_vab, "vab num_packets 0x%lx", vab_num_packets);
vab_user_buf_min_size_bytes =
vab_num_packets * GA10B_VAB_PACKET_ACCESS_BITS_SIZE_IN_BYTES;
if ((user_buf_size % rd_wr_granularity_size) != 0UL) { if ((user_buf_size % rd_wr_granularity_size) != 0UL) {
/* Restriction comes from the rd_n/wr_n operations */ /* Restriction comes from the rd_n/wr_n operations */
nvgpu_err(g, "user_buf size must 4-bytes-aligned."); nvgpu_err(g, "user_buf size must 4-bytes-aligned.");
return -EINVAL; return -EINVAL;
} }
if (user_buf_size < GA10B_VAB_ACCESS_BITS_TOTAL_SIZE_IN_BYTES) { if (user_buf_size < vab_user_buf_min_size_bytes) {
nvgpu_err(g, "user_buf size must be at least %lu bytes. Given: %llu", nvgpu_err(g,
GA10B_VAB_ACCESS_BITS_TOTAL_SIZE_IN_BYTES, user_buf_size); "user_buf size must be at least %lu bytes. Given: %llu",
vab_user_buf_min_size_bytes, user_buf_size);
return -EINVAL; return -EINVAL;
} }
/* Get buffer address */ /* Get buffer address */
vab_put_ptr = nvgpu_readl(g, fb_mmu_vidmem_access_bit_buffer_put_r()); vab_put_ptr = nvgpu_readl(g, fb_mmu_vidmem_access_bit_buffer_put_r());
vab_put_ptr_value = fb_mmu_vidmem_access_bit_buffer_put_ptr_v(vab_put_ptr); vab_put_ptr_value =
fb_mmu_vidmem_access_bit_buffer_put_ptr_v(vab_put_ptr);
nvgpu_log(g, gpu_dbg_vab, "vab_put_ptr 0x%x", vab_put_ptr); nvgpu_log(g, gpu_dbg_vab, "vab_put_ptr 0x%x", vab_put_ptr);
if (vab_put_ptr_value != 0U) { if (vab_put_ptr_value != 0U) {
nvgpu_err(g, "unexpected vab_put_ptr value: %u", vab_put_ptr_value); nvgpu_err(g, "unexpected vab_put_ptr value: %u",
vab_put_ptr_value);
return -EINVAL; return -EINVAL;
} }
@@ -385,30 +384,38 @@ int ga10b_fb_vab_dump_and_clear(struct gk20a *g, u8 *user_buf,
goto done; goto done;
} }
for (i = 0U; i < GA10B_VAB_NUM_PACKETS; i++) { for (i = 0U; i < vab_num_packets; i++) {
/* The valid bit is the very top bit of this packet's 64 bytes */ /*
* The valid bit is the very top bit of this packet's 64 bytes
*/
valid_offset_in_bytes = valid_offset_in_bytes =
(i + 1ULL) * GA10B_VAB_PACKET_SIZE_IN_BYTES - rd_wr_granularity_size; (i + 1ULL) * GA10B_VAB_PACKET_SIZE_IN_BYTES -
rd_wr_granularity_size;
/* Poll the bit to see if this packet's results are valid */ /* Poll the bit to see if this packet's results are valid */
err = ga10b_fb_vab_query_valid_bit(g, vab_buf, valid_offset_in_bytes, &valid_wr); err = ga10b_fb_vab_query_valid_bit(g, vab_buf,
valid_offset_in_bytes, &valid_wr);
if (err == 0) { if (err == 0) {
/* /*
* Read VAB bits. Each packet is 64 bytes, but only 32 are access bytes. * Read VAB bits. Each packet is 64 bytes, but only 32
* User expects contiguous dump of access bits, so some extra calculations * are access bytes. User expects contiguous dump of
* access bits, so some extra calculations
* are necessary. * are necessary.
*/ */
const u64 num_bytes_to_copy = GA10B_VAB_PACKET_ACCESS_BITS_SIZE_IN_BYTES; const u64 num_bytes_to_copy =
GA10B_VAB_PACKET_ACCESS_BITS_SIZE_IN_BYTES;
/* Determine source buffer */ /* Determine source buffer */
const u64 vab_offset = i * GA10B_VAB_PACKET_SIZE_IN_BYTES; const u64 vab_offset =
i * GA10B_VAB_PACKET_SIZE_IN_BYTES;
/* Determine destination va */ /* Determine destination va */
u8 *user_buf_destination = user_buf + i * GA10B_VAB_PACKET_ACCESS_BITS_SIZE_IN_BYTES; u8 *user_buf_destination = user_buf +
nvgpu_mem_rd_n(g, vab_buf, vab_offset, user_buf_destination, num_bytes_to_copy); i * GA10B_VAB_PACKET_ACCESS_BITS_SIZE_IN_BYTES;
nvgpu_mem_rd_n(g, vab_buf, vab_offset,
user_buf_destination, num_bytes_to_copy);
} else { } else {
nvgpu_err(g, "Reading packet's %llu failed", i); nvgpu_err(g, "Reading packet's %lu failed", i);
goto clear_valid_bits; goto clear_valid_bits;
} }
} }
err = 0; err = 0;
clear_valid_bits: clear_valid_bits:
@@ -417,10 +424,12 @@ clear_valid_bits:
* All bits need to be cleared even if querying failed for any of the bits. * All bits need to be cleared even if querying failed for any of the bits.
*/ */
valid_wr = 0; valid_wr = 0;
for (i = 0U; i < GA10B_VAB_NUM_PACKETS; i++) { for (i = 0U; i < vab_num_packets; i++) {
valid_offset_in_bytes = valid_offset_in_bytes =
(i + 1ULL) * GA10B_VAB_PACKET_SIZE_IN_BYTES - rd_wr_granularity_size; (i + 1ULL) * GA10B_VAB_PACKET_SIZE_IN_BYTES -
nvgpu_mem_wr_n(g, vab_buf, valid_offset_in_bytes, &valid_wr, sizeof(valid_wr)); rd_wr_granularity_size;
nvgpu_mem_wr_n(g, vab_buf,
valid_offset_in_bytes, &valid_wr, sizeof(valid_wr));
} }
done: done:
@@ -431,7 +440,7 @@ int ga10b_fb_vab_release(struct gk20a *g)
{ {
nvgpu_log_fn(g, " "); nvgpu_log_fn(g, " ");
ga10b_fb_val_release_registers(g); ga10b_fb_vab_enable(g, false);
return 0; return 0;
} }
@@ -442,7 +451,7 @@ int ga10b_fb_vab_teardown(struct gk20a *g)
* free vab buffer * free vab buffer
*/ */
struct vm_gk20a *vm = g->mm.bar2.vm; struct vm_gk20a *vm = g->mm.bar2.vm;
struct nvgpu_mem *vab_buf = &g->mm.vab.buffer; struct nvgpu_mem *vab_buf = &g->vab.buffer;
if (nvgpu_mem_is_valid(vab_buf)) { if (nvgpu_mem_is_valid(vab_buf)) {
nvgpu_dma_unmap_free(vm, vab_buf); nvgpu_dma_unmap_free(vm, vab_buf);

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -27,6 +27,7 @@ struct gk20a;
struct nvgpu_vab_range_checker; struct nvgpu_vab_range_checker;
int ga10b_fb_vab_init(struct gk20a *g); int ga10b_fb_vab_init(struct gk20a *g);
void ga10b_fb_vab_set_vab_buffer_address(struct gk20a *g, u64 buf_addr);
int ga10b_fb_vab_reserve(struct gk20a *g, u32 vab_mode, u32 num_range_checkers, int ga10b_fb_vab_reserve(struct gk20a *g, u32 vab_mode, u32 num_range_checkers,
struct nvgpu_vab_range_checker *vab_range_checker); struct nvgpu_vab_range_checker *vab_range_checker);
int ga10b_fb_vab_dump_and_clear(struct gk20a *g, u8 *user_buf, int ga10b_fb_vab_dump_and_clear(struct gk20a *g, u8 *user_buf,

View File

@@ -1,7 +1,7 @@
/* /*
* GA10b GPU GR * GA10b GPU GR
* *
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -1064,7 +1064,7 @@ const u32 *ga10b_gr_get_hwpm_cau_init_data(u32 *count)
#endif /* CONFIG_NVGPU_DEBUGGER */ #endif /* CONFIG_NVGPU_DEBUGGER */
#ifdef CONFIG_NVGPU_HAL_NON_FUSA #ifdef CONFIG_NVGPU_HAL_NON_FUSA
void ga10b_gr_vab_init(struct gk20a *g, u32 vab_reg, u32 num_range_checkers, void ga10b_gr_vab_reserve(struct gk20a *g, u32 vab_reg, u32 num_range_checkers,
struct nvgpu_vab_range_checker *vab_range_checker) struct nvgpu_vab_range_checker *vab_range_checker)
{ {
/* /*
@@ -1092,17 +1092,13 @@ void ga10b_gr_vab_init(struct gk20a *g, u32 vab_reg, u32 num_range_checkers,
granularity_shift_bits)); granularity_shift_bits));
} }
/* Setup VAB */
nvgpu_writel(g, gr_gpcs_mmu_vidmem_access_bit_r(), vab_reg); nvgpu_writel(g, gr_gpcs_mmu_vidmem_access_bit_r(), vab_reg);
} }
void ga10b_gr_vab_release(struct gk20a *g, u32 vab_reg) void ga10b_gr_vab_configure(struct gk20a *g, u32 vab_reg)
{ {
nvgpu_writel(g, gr_gpcs_mmu_vidmem_access_bit_r(), vab_reg); nvgpu_writel(g, gr_gpcs_mmu_vidmem_access_bit_r(), vab_reg);
} }
void ga10b_gr_vab_recover(struct gk20a *g, u32 vab_reg)
{
nvgpu_writel(g, gr_gpcs_mmu_vidmem_access_bit_r(), 0);
nvgpu_writel(g, gr_gpcs_mmu_vidmem_access_bit_r(), vab_reg);
}
#endif /* CONFIG_NVGPU_HAL_NON_FUSA */ #endif /* CONFIG_NVGPU_HAL_NON_FUSA */

View File

@@ -1,7 +1,7 @@
/* /*
* GA10B GPU GR * GA10B GPU GR
* *
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -41,10 +41,9 @@ int gr_ga10b_dump_gr_status_regs(struct gk20a *g,
void gr_ga10b_set_circular_buffer_size(struct gk20a *g, u32 data); void gr_ga10b_set_circular_buffer_size(struct gk20a *g, u32 data);
void ga10b_gr_set_gpcs_rops_crop_debug4(struct gk20a *g, u32 data); void ga10b_gr_set_gpcs_rops_crop_debug4(struct gk20a *g, u32 data);
#ifdef CONFIG_NVGPU_HAL_NON_FUSA #ifdef CONFIG_NVGPU_HAL_NON_FUSA
void ga10b_gr_vab_init(struct gk20a *g, u32 vab_reg, u32 num_range_checkers, void ga10b_gr_vab_reserve(struct gk20a *g, u32 vab_reg, u32 num_range_checkers,
struct nvgpu_vab_range_checker *vab_range_checker); struct nvgpu_vab_range_checker *vab_range_checker);
void ga10b_gr_vab_release(struct gk20a *g, u32 vab_reg); void ga10b_gr_vab_configure(struct gk20a *g, u32 vab_reg);
void ga10b_gr_vab_recover(struct gk20a *g, u32 vab_reg);
#endif /* CONFIG_NVGPU_HAL_NON_FUSA */ #endif /* CONFIG_NVGPU_HAL_NON_FUSA */
#ifdef CONFIG_NVGPU_DEBUGGER #ifdef CONFIG_NVGPU_DEBUGGER

View File

@@ -778,9 +778,8 @@ static const struct gops_gr ga10b_ops_gr = {
.gr_init_support = nvgpu_gr_init_support, .gr_init_support = nvgpu_gr_init_support,
.gr_suspend = nvgpu_gr_suspend, .gr_suspend = nvgpu_gr_suspend,
#ifdef CONFIG_NVGPU_HAL_NON_FUSA #ifdef CONFIG_NVGPU_HAL_NON_FUSA
.vab_init = ga10b_gr_vab_init, .vab_reserve = ga10b_gr_vab_reserve,
.vab_recover = ga10b_gr_vab_recover, .vab_configure = ga10b_gr_vab_configure,
.vab_release = ga10b_gr_vab_release,
#endif #endif
#ifdef CONFIG_NVGPU_DEBUGGER #ifdef CONFIG_NVGPU_DEBUGGER
.get_gr_status = gr_gm20b_get_gr_status, .get_gr_status = gr_gm20b_get_gr_status,
@@ -878,6 +877,7 @@ static const struct gops_fb_intr ga10b_ops_fb_intr = {
#ifdef CONFIG_NVGPU_HAL_NON_FUSA #ifdef CONFIG_NVGPU_HAL_NON_FUSA
static const struct gops_fb_vab ga10b_ops_fb_vab = { static const struct gops_fb_vab ga10b_ops_fb_vab = {
.init = ga10b_fb_vab_init, .init = ga10b_fb_vab_init,
.set_vab_buffer_address = ga10b_fb_vab_set_vab_buffer_address,
.reserve = ga10b_fb_vab_reserve, .reserve = ga10b_fb_vab_reserve,
.dump_and_clear = ga10b_fb_vab_dump_and_clear, .dump_and_clear = ga10b_fb_vab_dump_and_clear,
.release = ga10b_fb_vab_release, .release = ga10b_fb_vab_release,

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -464,9 +464,8 @@ static const struct gops_gr_intr vgpu_ga10b_ops_gr_intr = {
static const struct gops_gr vgpu_ga10b_ops_gr = { static const struct gops_gr vgpu_ga10b_ops_gr = {
.gr_init_support = nvgpu_gr_init_support, .gr_init_support = nvgpu_gr_init_support,
.gr_suspend = nvgpu_gr_suspend, .gr_suspend = nvgpu_gr_suspend,
.vab_init = NULL, .vab_reserve = NULL,
.vab_release = NULL, .vab_configure = NULL,
.vab_recover = NULL,
#ifdef CONFIG_NVGPU_DEBUGGER #ifdef CONFIG_NVGPU_DEBUGGER
.set_alpha_circular_buffer_size = NULL, .set_alpha_circular_buffer_size = NULL,
.set_circular_buffer_size = NULL, .set_circular_buffer_size = NULL,
@@ -1058,6 +1057,7 @@ static const struct gops_grmgr vgpu_ga10b_ops_grmgr = {
static const struct gops_fb_vab vgpu_ga10b_ops_fb_vab = { static const struct gops_fb_vab vgpu_ga10b_ops_fb_vab = {
.init = NULL, .init = NULL,
.set_vab_buffer_address = NULL,
.reserve = vgpu_fb_vab_reserve, .reserve = vgpu_fb_vab_reserve,
.dump_and_clear = vgpu_fb_vab_dump_and_clear, .dump_and_clear = vgpu_fb_vab_dump_and_clear,
.release = vgpu_fb_vab_release, .release = vgpu_fb_vab_release,

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -57,6 +57,8 @@ struct nvgpu_vab_range_checker {
struct nvgpu_vab { struct nvgpu_vab {
u32 user_num_range_checkers; u32 user_num_range_checkers;
u32 num_entries;
unsigned long entry_size;
struct nvgpu_mem buffer; struct nvgpu_mem buffer;
}; };

View File

@@ -727,6 +727,10 @@ struct gk20a {
#ifdef CONFIG_NVGPU_NON_FUSA #ifdef CONFIG_NVGPU_NON_FUSA
u32 tpc_fs_mask_user; u32 tpc_fs_mask_user;
u32 fecs_feature_override_ecc_val; u32 fecs_feature_override_ecc_val;
/** VAB struct */
struct nvgpu_vab vab;
#endif #endif
#ifdef CONFIG_NVGPU_STATIC_POWERGATE #ifdef CONFIG_NVGPU_STATIC_POWERGATE

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -171,6 +171,12 @@ struct gops_fb_vab {
*/ */
int (*init)(struct gk20a *g); int (*init)(struct gk20a *g);
/**
* @brief Set VAB buffer address in HW registers
*
*/
void (*set_vab_buffer_address)(struct gk20a *g, u64 buf_addr);
/** /**
* @brief Initialize VAB range checkers and enable VAB tracking * @brief Initialize VAB range checkers and enable VAB tracking
* *

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -1365,10 +1365,9 @@ struct gops_gr {
struct gops_gr_zcull zcull; struct gops_gr_zcull zcull;
#endif /* CONFIG_NVGPU_GRAPHICS */ #endif /* CONFIG_NVGPU_GRAPHICS */
#if defined(CONFIG_NVGPU_HAL_NON_FUSA) #if defined(CONFIG_NVGPU_HAL_NON_FUSA)
void (*vab_init)(struct gk20a *g, u32 vab_reg, u32 num_range_checkers, void (*vab_reserve)(struct gk20a *g, u32 vab_reg, u32 num_range_checkers,
struct nvgpu_vab_range_checker *vab_range_checker); struct nvgpu_vab_range_checker *vab_range_checker);
void (*vab_recover)(struct gk20a *g, u32 vab_reg); void (*vab_configure)(struct gk20a *g, u32 vab_reg);
void (*vab_release)(struct gk20a *g, u32 vab_reg);
#endif #endif
/** @endcond */ /** @endcond */
}; };

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -457,11 +457,6 @@ struct mm_gk20a {
struct nvgpu_mem mmu_wr_mem; struct nvgpu_mem mmu_wr_mem;
/** GMMU debug read buffer. */ /** GMMU debug read buffer. */
struct nvgpu_mem mmu_rd_mem; struct nvgpu_mem mmu_rd_mem;
#if defined(CONFIG_NVGPU_NON_FUSA)
/** VAB struct */
struct nvgpu_vab vab;
#endif
}; };
/** /**