gpu: nvgpu: ga10b: Use active ltcs count for cbc init

This patch fixes a bug in the cbc initialization code for ga10b,
where it was erroneously assumed that a fixed ltc count of only one
should be used for historical reasons. For volta and later, the full
ltc count should be used in cbc-related computation.
Ensure
- CBC base address is 64K aligned
- CBC start address lies within CBC allocated memory

Check CBC is marked safe only for silicon platform.

Bug 3353418

Change-Id: I5edee2a78dc9e8c149e111a9f088a57e0154f5c2
Signed-off-by: Vedashree Vidwans <vvidwans@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2585778
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Vedashree Vidwans
2021-08-25 21:58:21 -07:00
committed by mobile promotions
parent cc7b048641
commit a3e2283cf2
13 changed files with 113 additions and 67 deletions

View File

@@ -33,17 +33,6 @@
#include <nvgpu/hw/ga10b/hw_ltc_ga10b.h>
u64 ga10b_cbc_get_base_divisor(struct gk20a *g)
{
/*
* For Tegra, the addressing works differently. Unlike DGPU, all
* partitions talk to the same memory.
*/
u64 ltc_count = 1ULL;
return ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v();
}
int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc)
{
/*
@@ -83,11 +72,7 @@ int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc)
ltc_ltcs_ltss_cbc_param_bytes_per_comptagline_per_slice_v(
nvgpu_readl(g, ltc_ltcs_ltss_cbc_param_r()));
/*
* For Tegra, the addressing works differently. Unlike DGPU, all
* partitions talk to the same memory.
*/
u32 ltc_count = 1U;
u64 base_divisor = 0ULL;
/* check if vidmem is present */
bool alloc_vidmem = g->ops.fb.get_vidmem_size != NULL ? true : false;
@@ -111,7 +96,8 @@ int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc)
/* Memory required for comptag lines in all slices of all ltcs */
compbit_backing_size = nvgpu_safe_mult_u32(
nvgpu_safe_mult_u32(max_comptag_lines,
nvgpu_ltc_get_slices_per_ltc(g)), ltc_count);
nvgpu_ltc_get_slices_per_ltc(g)),
nvgpu_ltc_get_ltc_count(g));
/* Total memory required for compstatus */
compbit_backing_size = nvgpu_safe_mult_u32(
@@ -119,13 +105,41 @@ int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc)
gobs_per_comptagline_per_slice), compstatus_per_gob);
/* aligned to 2KB * ltc_count */
compbit_backing_size += nvgpu_safe_cast_u64_to_u32(
g->ops.cbc.get_base_divisor(g));
g->ops.fb.cbc_get_alignment(g, &base_divisor, NULL);
compbit_backing_size = nvgpu_safe_cast_u64_to_u32(
nvgpu_safe_add_u64(compbit_backing_size, base_divisor));
/* must be a multiple of 64KB */
compbit_backing_size = round_up(compbit_backing_size, SZ_64K);
err = nvgpu_cbc_alloc(g, compbit_backing_size, alloc_vidmem);
/*
* Address calculation for CBC applies swizzle to the lower 16 bits
* of physical address. So, CBC start and end address should be 64KB
* aligned.
* Memory allocated is aligned corresponding to PAGE_SIZE and can be
* seen as:
*
* ------------------------ Allocated physical memory end address
* ^ -------------- 64KB aligned CBC end address
* | ^
* | allocated |
* | physical |
* | address | CBC occupied
* | space | address space
* | |
* | v
* v -------------- 64KB aligned CBC start address
* ------------------------ Allocated physical memory start address
*
* With PAGE_SIZE other than 64KB, the physical memory start address
* may not be 64KB aligned. So, choose CBC start address to be the
* lower 64KB multiple within the allocated memory.
* However, offsetting start address will put the CBC memory beyond
* the allocated space. Hence, request for 64KB additional memory to
* incorporate the offset.
*/
err = nvgpu_cbc_alloc(g, (compbit_backing_size + SZ_64K), alloc_vidmem);
if (err != 0) {
return err;
}
@@ -144,8 +158,6 @@ int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc)
nvgpu_log(g, gpu_dbg_info | gpu_dbg_pte, "supported LTCs: 0x%x",
nvgpu_ltc_get_ltc_count(g));
nvgpu_log(g, gpu_dbg_info | gpu_dbg_pte,
"ltc_count used for calculations: 0x%x", ltc_count);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_pte,
"compbit backing store size : 0x%x", compbit_backing_size);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_pte,

View File

@@ -30,7 +30,6 @@
struct gk20a;
struct nvgpu_cbc;
u64 ga10b_cbc_get_base_divisor(struct gk20a *g);
int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc);
void ga10b_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc);

View File

@@ -1,7 +1,7 @@
/*
* TU104 CBC
*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -34,13 +34,6 @@
#include "cbc_tu104.h"
u64 tu104_cbc_get_base_divisor(struct gk20a *g)
{
return (u64)nvgpu_ltc_get_ltc_count(g) <<
ltc_ltcs_ltss_cbc_base_alignment_shift_v();
}
int tu104_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc)
{
/* max memory size (MB) to cover */

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -31,7 +31,6 @@ enum nvgpu_cbc_op;
struct gk20a;
struct nvgpu_cbc;
u64 tu104_cbc_get_base_divisor(struct gk20a *g);
int tu104_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc);
int tu104_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op,
u32 min, u32 max);

View File

@@ -38,21 +38,23 @@
#ifdef CONFIG_NVGPU_COMPRESSION
void ga10b_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc)
{
u64 compbit_store_base;
u64 compbit_store_pa;
u64 combit_top_size;
u64 combit_top;
u32 cbc_max_rval;
/* Unlike dgpu, partition swizzling is disabled for ga10b */
u32 num_swizzled_ltcs = 1U;
u64 base_divisor = 0ULL;
u64 top_divisor = 0ULL;
u64 compbit_store_base = 0ULL;
u64 compbit_start_pa = 0ULL;
u64 compbit_store_pa = 0ULL;
u64 combit_top_size = 0ULL;
u64 combit_top = 0ULL;
u32 cbc_max_rval = 0U;
g->ops.fb.cbc_get_alignment(g, &base_divisor, &top_divisor);
/*
* Update CBC registers
* Note: CBC Base value should be updated after CBC MAX
*/
combit_top_size = cbc->compbit_backing_size;
combit_top = (combit_top_size / num_swizzled_ltcs) >>
fb_mmu_cbc_top_alignment_shift_v();
combit_top = combit_top_size / top_divisor;
nvgpu_assert(combit_top < U64(U32_MAX));
nvgpu_writel(g, fb_mmu_cbc_top_r(),
fb_mmu_cbc_top_size_f(u64_lo32(combit_top)));
@@ -64,31 +66,44 @@ void ga10b_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc)
nvgpu_writel(g, fb_mmu_cbc_max_r(), cbc_max_rval);
compbit_store_pa = nvgpu_mem_get_addr(g, &cbc->compbit_store.mem);
compbit_store_base = (compbit_store_pa / num_swizzled_ltcs) >>
fb_mmu_cbc_base_alignment_shift_v();
/* must be a multiple of 64KB within allocated memory */
compbit_store_base = round_up(compbit_store_pa, SZ_64K);
/* Calculate post-divide cbc address */
compbit_store_base = compbit_store_base / base_divisor;
/*
* CBC start address is calculated from the CBC_BASE register value
* Check that CBC start address lies within cbc allocated memory.
*/
compbit_start_pa = compbit_store_base * base_divisor;
nvgpu_assert(compbit_start_pa >= compbit_store_pa);
nvgpu_assert(compbit_store_base < U64(U32_MAX));
nvgpu_writel(g, fb_mmu_cbc_base_r(),
fb_mmu_cbc_base_address_f(u64_lo32(compbit_store_base)));
if (nvgpu_platform_is_silicon(g)) {
/* Make sure cbc is marked safe by MMU */
cbc_max_rval = nvgpu_readl(g, fb_mmu_cbc_max_r());
if ((cbc_max_rval & fb_mmu_cbc_max_safe_m()) !=
fb_mmu_cbc_max_safe_true_f()) {
nvgpu_err(g,
"CBC marked unsafe by MMU, check cbc config");
}
}
cbc->compbit_store.base_hw = compbit_store_base;
nvgpu_log(g, gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte,
"compbit top size: 0x%x,%08x \n",
(u32)(combit_top_size >> 32),
(u32)(combit_top_size & 0xffffffffU));
nvgpu_log(g, gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte,
"compbit base.pa: 0x%x,%08x cbc_base:0x%llx\n",
"compbit mem.pa: 0x%x,%08x cbc_base:0x%llx\n",
(u32)(compbit_store_pa >> 32),
(u32)(compbit_store_pa & 0xffffffffU),
compbit_store_base);
/* Make sure cbc is marked safe by MMU */
cbc_max_rval = nvgpu_readl(g, fb_mmu_cbc_max_r());
if ((cbc_max_rval & fb_mmu_cbc_max_safe_m()) !=
fb_mmu_cbc_max_safe_true_f()) {
nvgpu_err(g, "CBC marked unsafe by MMU, check cbc config");
}
cbc->compbit_store.base_hw = compbit_store_base;
}
#endif

View File

@@ -104,9 +104,26 @@ int fb_tu104_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb)
}
#ifdef CONFIG_NVGPU_COMPRESSION
void tu104_fb_cbc_get_alignment(struct gk20a *g,
u64 *base_divisor, u64 *top_divisor)
{
u64 ltc_count = (u64)nvgpu_ltc_get_ltc_count(g);
if (base_divisor != NULL) {
*base_divisor =
ltc_count << fb_mmu_cbc_base_alignment_shift_v();
}
if (top_divisor != NULL) {
*top_divisor =
ltc_count << fb_mmu_cbc_top_alignment_shift_v();
}
}
void tu104_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc)
{
u64 base_divisor;
u64 top_divisor;
u64 compbit_store_base;
u64 compbit_store_pa;
u64 cbc_start_addr, cbc_end_addr;
@@ -114,17 +131,14 @@ void tu104_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc)
u64 cbc_top_size;
u32 cbc_max;
g->ops.fb.cbc_get_alignment(g, &base_divisor, &top_divisor);
compbit_store_pa = nvgpu_mem_get_addr(g, &cbc->compbit_store.mem);
base_divisor = g->ops.cbc.get_base_divisor(g);
compbit_store_base = DIV_ROUND_UP(compbit_store_pa, base_divisor);
cbc_start_addr = (u64)nvgpu_ltc_get_ltc_count(g) *
(compbit_store_base <<
fb_mmu_cbc_base_address_alignment_shift_v());
cbc_start_addr = compbit_store_base * base_divisor;
cbc_end_addr = cbc_start_addr + cbc->compbit_backing_size;
cbc_top = (cbc_end_addr / nvgpu_ltc_get_ltc_count(g)) >>
fb_mmu_cbc_base_address_alignment_shift_v();
cbc_top = (cbc_end_addr / top_divisor);
cbc_top_size = u64_lo32(cbc_top) - compbit_store_base;
nvgpu_assert(cbc_top_size < U64(U32_MAX));

View File

@@ -31,6 +31,9 @@ struct nvgpu_mem;
int fb_tu104_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb);
#ifdef CONFIG_NVGPU_COMPRESSION
struct nvgpu_cbc;
void tu104_fb_cbc_get_alignment(struct gk20a *g,
u64 *base_divisor, u64 *top_divisor);
void tu104_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc);
#endif
int tu104_fb_apply_pdb_cache_errata(struct gk20a *g);

View File

@@ -403,7 +403,6 @@ static const struct gops_cbc ga100_ops_cbc = {
.cbc_init_support = nvgpu_cbc_init_support,
.cbc_remove_support = nvgpu_cbc_remove_support,
.init = tu104_cbc_init,
.get_base_divisor = tu104_cbc_get_base_divisor,
.alloc_comptags = ga100_cbc_alloc_comptags,
.ctrl = tu104_cbc_ctrl,
.fix_config = NULL,
@@ -882,6 +881,7 @@ static const struct gops_fb ga100_ops_fb = {
#ifdef CONFIG_NVGPU_COMPRESSION
.is_comptagline_mode_enabled = ga100_fb_is_comptagline_mode_enabled,
.cbc_configure = tu104_fb_cbc_configure,
.cbc_get_alignment = tu104_fb_cbc_get_alignment,
.set_use_full_comp_tag_line = gm20b_fb_set_use_full_comp_tag_line,
.compression_page_size = ga100_fb_compression_page_size,
.compressible_page_size = gp10b_fb_compressible_page_size,

View File

@@ -366,7 +366,6 @@ static const struct gops_cbc ga10b_ops_cbc = {
.cbc_init_support = nvgpu_cbc_init_support,
.cbc_remove_support = nvgpu_cbc_remove_support,
.init = ga10b_cbc_init,
.get_base_divisor = ga10b_cbc_get_base_divisor,
.alloc_comptags = ga10b_cbc_alloc_comptags,
.ctrl = tu104_cbc_ctrl,
};
@@ -871,6 +870,7 @@ static const struct gops_fb ga10b_ops_fb = {
.mmu_debug_rd = gm20b_fb_mmu_debug_rd,
#ifdef CONFIG_NVGPU_COMPRESSION
.cbc_configure = ga10b_fb_cbc_configure,
.cbc_get_alignment = tu104_fb_cbc_get_alignment,
.set_use_full_comp_tag_line = NULL,
.compression_page_size = gp10b_fb_compression_page_size,
.compressible_page_size = gp10b_fb_compressible_page_size,

View File

@@ -341,7 +341,6 @@ static const struct gops_cbc tu104_ops_cbc = {
.cbc_init_support = nvgpu_cbc_init_support,
.cbc_remove_support = nvgpu_cbc_remove_support,
.init = tu104_cbc_init,
.get_base_divisor = tu104_cbc_get_base_divisor,
.alloc_comptags = tu104_cbc_alloc_comptags,
.ctrl = tu104_cbc_ctrl,
.fix_config = NULL,
@@ -797,6 +796,7 @@ static const struct gops_fb tu104_ops_fb = {
.mmu_debug_rd = gm20b_fb_mmu_debug_rd,
#ifdef CONFIG_NVGPU_COMPRESSION
.cbc_configure = tu104_fb_cbc_configure,
.cbc_get_alignment = tu104_fb_cbc_get_alignment,
.set_use_full_comp_tag_line = gm20b_fb_set_use_full_comp_tag_line,
.compression_page_size = gp10b_fb_compression_page_size,
.compressible_page_size = gp10b_fb_compressible_page_size,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,7 +27,6 @@ struct gops_cbc {
int (*cbc_init_support)(struct gk20a *g);
void (*cbc_remove_support)(struct gk20a *g);
void (*init)(struct gk20a *g, struct nvgpu_cbc *cbc);
u64 (*get_base_divisor)(struct gk20a *g);
int (*alloc_comptags)(struct gk20a *g,
struct nvgpu_cbc *cbc);
int (*ctrl)(struct gk20a *g, enum nvgpu_cbc_op op,

View File

@@ -440,6 +440,18 @@ struct gops_fb {
(struct gk20a *g);
#ifdef CONFIG_NVGPU_COMPRESSION
void (*cbc_configure)(struct gk20a *g, struct nvgpu_cbc *cbc);
/**
* @brief Get cbc base and top alignment specs.
*
* @param g [in] Pointer to GPU driver struct.
*
* The function calculates and returns required CBC base and top
* alignment values.
*
* @return None.
*/
void (*cbc_get_alignment)(struct gk20a *g,
u64 *base_divisor, u64 *top_divisor);
bool (*set_use_full_comp_tag_line)(struct gk20a *g);
/*

View File

@@ -687,13 +687,13 @@
#define fb_mmu_num_active_ltcs_count_f(v) ((U32(v) & 0x1fU) << 0U)
#define fb_mmu_num_active_ltcs_count_v(r) (((r) >> 0U) & 0x1fU)
#define fb_mmu_cbc_base_r() (0x00100ec4U)
#define fb_mmu_cbc_base_alignment_shift_v() (0x0000000bU)
#define fb_mmu_cbc_base_address_f(v) ((U32(v) & 0x3ffffffU) << 0U)
#define fb_mmu_cbc_base_address_v(r) (((r) >> 0U) & 0x3ffffffU)
#define fb_mmu_cbc_base_address_alignment_shift_v() (0x0000000bU)
#define fb_mmu_cbc_top_r() (0x00100ec8U)
#define fb_mmu_cbc_top_alignment_shift_v() (0x0000000bU)
#define fb_mmu_cbc_top_size_f(v) ((U32(v) & 0x7fffU) << 0U)
#define fb_mmu_cbc_top_size_v(r) (((r) >> 0U) & 0x7fffU)
#define fb_mmu_cbc_top_size_alignment_shift_v() (0x0000000bU)
#define fb_mmu_cbc_max_r() (0x00100eccU)
#define fb_mmu_cbc_max_comptagline_f(v) ((U32(v) & 0xffffffU) << 0U)
#define fb_mmu_cbc_max_comptagline_m() (U32(0xffffffU) << 0U)