mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: ga10b: Use active ltcs count for cbc init
This patch fixes a bug in the cbc initialization code for ga10b, where it was erroneously assumed that a fixed ltc count of only one should be used for historical reasons. For volta and later, the full ltc count should be used in cbc-related computation. Ensure - CBC base address is 64K aligned - CBC start address lies within CBC allocated memory Check CBC is marked safe only for silicon platform. Bug 3353418 Change-Id: I5edee2a78dc9e8c149e111a9f088a57e0154f5c2 Signed-off-by: Vedashree Vidwans <vvidwans@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2585778 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: Seema Khowala <seemaj@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
cc7b048641
commit
a3e2283cf2
@@ -33,17 +33,6 @@
|
||||
|
||||
#include <nvgpu/hw/ga10b/hw_ltc_ga10b.h>
|
||||
|
||||
u64 ga10b_cbc_get_base_divisor(struct gk20a *g)
|
||||
{
|
||||
/*
|
||||
* For Tegra, the addressing works differently. Unlike DGPU, all
|
||||
* partitions talk to the same memory.
|
||||
*/
|
||||
u64 ltc_count = 1ULL;
|
||||
|
||||
return ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v();
|
||||
}
|
||||
|
||||
int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc)
|
||||
{
|
||||
/*
|
||||
@@ -83,11 +72,7 @@ int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc)
|
||||
ltc_ltcs_ltss_cbc_param_bytes_per_comptagline_per_slice_v(
|
||||
nvgpu_readl(g, ltc_ltcs_ltss_cbc_param_r()));
|
||||
|
||||
/*
|
||||
* For Tegra, the addressing works differently. Unlike DGPU, all
|
||||
* partitions talk to the same memory.
|
||||
*/
|
||||
u32 ltc_count = 1U;
|
||||
u64 base_divisor = 0ULL;
|
||||
|
||||
/* check if vidmem is present */
|
||||
bool alloc_vidmem = g->ops.fb.get_vidmem_size != NULL ? true : false;
|
||||
@@ -111,7 +96,8 @@ int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc)
|
||||
/* Memory required for comptag lines in all slices of all ltcs */
|
||||
compbit_backing_size = nvgpu_safe_mult_u32(
|
||||
nvgpu_safe_mult_u32(max_comptag_lines,
|
||||
nvgpu_ltc_get_slices_per_ltc(g)), ltc_count);
|
||||
nvgpu_ltc_get_slices_per_ltc(g)),
|
||||
nvgpu_ltc_get_ltc_count(g));
|
||||
|
||||
/* Total memory required for compstatus */
|
||||
compbit_backing_size = nvgpu_safe_mult_u32(
|
||||
@@ -119,13 +105,41 @@ int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc)
|
||||
gobs_per_comptagline_per_slice), compstatus_per_gob);
|
||||
|
||||
/* aligned to 2KB * ltc_count */
|
||||
compbit_backing_size += nvgpu_safe_cast_u64_to_u32(
|
||||
g->ops.cbc.get_base_divisor(g));
|
||||
g->ops.fb.cbc_get_alignment(g, &base_divisor, NULL);
|
||||
compbit_backing_size = nvgpu_safe_cast_u64_to_u32(
|
||||
nvgpu_safe_add_u64(compbit_backing_size, base_divisor));
|
||||
|
||||
/* must be a multiple of 64KB */
|
||||
compbit_backing_size = round_up(compbit_backing_size, SZ_64K);
|
||||
|
||||
err = nvgpu_cbc_alloc(g, compbit_backing_size, alloc_vidmem);
|
||||
/*
|
||||
* Address calculation for CBC applies swizzle to the lower 16 bits
|
||||
* of physical address. So, CBC start and end address should be 64KB
|
||||
* aligned.
|
||||
* Memory allocated is aligned corresponding to PAGE_SIZE and can be
|
||||
* seen as:
|
||||
*
|
||||
* ------------------------ Allocated physical memory end address
|
||||
* ^ -------------- 64KB aligned CBC end address
|
||||
* | ^
|
||||
* | allocated |
|
||||
* | physical |
|
||||
* | address | CBC occupied
|
||||
* | space | address space
|
||||
* | |
|
||||
* | v
|
||||
* v -------------- 64KB aligned CBC start address
|
||||
* ------------------------ Allocated physical memory start address
|
||||
*
|
||||
* With PAGE_SIZE other than 64KB, the physical memory start address
|
||||
* may not be 64KB aligned. So, choose CBC start address to be the
|
||||
* lower 64KB multiple within the allocated memory.
|
||||
* However, offsetting start address will put the CBC memory beyond
|
||||
* the allocated space. Hence, request for 64KB additional memory to
|
||||
* incorporate the offset.
|
||||
*/
|
||||
|
||||
err = nvgpu_cbc_alloc(g, (compbit_backing_size + SZ_64K), alloc_vidmem);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
@@ -144,8 +158,6 @@ int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc)
|
||||
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_pte, "supported LTCs: 0x%x",
|
||||
nvgpu_ltc_get_ltc_count(g));
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_pte,
|
||||
"ltc_count used for calculations: 0x%x", ltc_count);
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_pte,
|
||||
"compbit backing store size : 0x%x", compbit_backing_size);
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_pte,
|
||||
|
||||
@@ -30,7 +30,6 @@
|
||||
struct gk20a;
|
||||
struct nvgpu_cbc;
|
||||
|
||||
u64 ga10b_cbc_get_base_divisor(struct gk20a *g);
|
||||
int ga10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc);
|
||||
void ga10b_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc);
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* TU104 CBC
|
||||
*
|
||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -34,13 +34,6 @@
|
||||
|
||||
#include "cbc_tu104.h"
|
||||
|
||||
|
||||
u64 tu104_cbc_get_base_divisor(struct gk20a *g)
|
||||
{
|
||||
return (u64)nvgpu_ltc_get_ltc_count(g) <<
|
||||
ltc_ltcs_ltss_cbc_base_alignment_shift_v();
|
||||
}
|
||||
|
||||
int tu104_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc)
|
||||
{
|
||||
/* max memory size (MB) to cover */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -31,7 +31,6 @@ enum nvgpu_cbc_op;
|
||||
struct gk20a;
|
||||
struct nvgpu_cbc;
|
||||
|
||||
u64 tu104_cbc_get_base_divisor(struct gk20a *g);
|
||||
int tu104_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc);
|
||||
int tu104_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op,
|
||||
u32 min, u32 max);
|
||||
|
||||
@@ -38,21 +38,23 @@
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
void ga10b_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc)
|
||||
{
|
||||
u64 compbit_store_base;
|
||||
u64 compbit_store_pa;
|
||||
u64 combit_top_size;
|
||||
u64 combit_top;
|
||||
u32 cbc_max_rval;
|
||||
/* Unlike dgpu, partition swizzling is disabled for ga10b */
|
||||
u32 num_swizzled_ltcs = 1U;
|
||||
u64 base_divisor = 0ULL;
|
||||
u64 top_divisor = 0ULL;
|
||||
u64 compbit_store_base = 0ULL;
|
||||
u64 compbit_start_pa = 0ULL;
|
||||
u64 compbit_store_pa = 0ULL;
|
||||
u64 combit_top_size = 0ULL;
|
||||
u64 combit_top = 0ULL;
|
||||
u32 cbc_max_rval = 0U;
|
||||
|
||||
g->ops.fb.cbc_get_alignment(g, &base_divisor, &top_divisor);
|
||||
|
||||
/*
|
||||
* Update CBC registers
|
||||
* Note: CBC Base value should be updated after CBC MAX
|
||||
*/
|
||||
combit_top_size = cbc->compbit_backing_size;
|
||||
combit_top = (combit_top_size / num_swizzled_ltcs) >>
|
||||
fb_mmu_cbc_top_alignment_shift_v();
|
||||
combit_top = combit_top_size / top_divisor;
|
||||
nvgpu_assert(combit_top < U64(U32_MAX));
|
||||
nvgpu_writel(g, fb_mmu_cbc_top_r(),
|
||||
fb_mmu_cbc_top_size_f(u64_lo32(combit_top)));
|
||||
@@ -64,31 +66,44 @@ void ga10b_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc)
|
||||
nvgpu_writel(g, fb_mmu_cbc_max_r(), cbc_max_rval);
|
||||
|
||||
compbit_store_pa = nvgpu_mem_get_addr(g, &cbc->compbit_store.mem);
|
||||
compbit_store_base = (compbit_store_pa / num_swizzled_ltcs) >>
|
||||
fb_mmu_cbc_base_alignment_shift_v();
|
||||
/* must be a multiple of 64KB within allocated memory */
|
||||
compbit_store_base = round_up(compbit_store_pa, SZ_64K);
|
||||
/* Calculate post-divide cbc address */
|
||||
compbit_store_base = compbit_store_base / base_divisor;
|
||||
|
||||
/*
|
||||
* CBC start address is calculated from the CBC_BASE register value
|
||||
* Check that CBC start address lies within cbc allocated memory.
|
||||
*/
|
||||
compbit_start_pa = compbit_store_base * base_divisor;
|
||||
nvgpu_assert(compbit_start_pa >= compbit_store_pa);
|
||||
|
||||
nvgpu_assert(compbit_store_base < U64(U32_MAX));
|
||||
nvgpu_writel(g, fb_mmu_cbc_base_r(),
|
||||
fb_mmu_cbc_base_address_f(u64_lo32(compbit_store_base)));
|
||||
|
||||
if (nvgpu_platform_is_silicon(g)) {
|
||||
/* Make sure cbc is marked safe by MMU */
|
||||
cbc_max_rval = nvgpu_readl(g, fb_mmu_cbc_max_r());
|
||||
if ((cbc_max_rval & fb_mmu_cbc_max_safe_m()) !=
|
||||
fb_mmu_cbc_max_safe_true_f()) {
|
||||
nvgpu_err(g,
|
||||
"CBC marked unsafe by MMU, check cbc config");
|
||||
}
|
||||
}
|
||||
|
||||
cbc->compbit_store.base_hw = compbit_store_base;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte,
|
||||
"compbit top size: 0x%x,%08x \n",
|
||||
(u32)(combit_top_size >> 32),
|
||||
(u32)(combit_top_size & 0xffffffffU));
|
||||
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte,
|
||||
"compbit base.pa: 0x%x,%08x cbc_base:0x%llx\n",
|
||||
"compbit mem.pa: 0x%x,%08x cbc_base:0x%llx\n",
|
||||
(u32)(compbit_store_pa >> 32),
|
||||
(u32)(compbit_store_pa & 0xffffffffU),
|
||||
compbit_store_base);
|
||||
|
||||
/* Make sure cbc is marked safe by MMU */
|
||||
cbc_max_rval = nvgpu_readl(g, fb_mmu_cbc_max_r());
|
||||
if ((cbc_max_rval & fb_mmu_cbc_max_safe_m()) !=
|
||||
fb_mmu_cbc_max_safe_true_f()) {
|
||||
nvgpu_err(g, "CBC marked unsafe by MMU, check cbc config");
|
||||
}
|
||||
|
||||
cbc->compbit_store.base_hw = compbit_store_base;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -104,9 +104,26 @@ int fb_tu104_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
void tu104_fb_cbc_get_alignment(struct gk20a *g,
|
||||
u64 *base_divisor, u64 *top_divisor)
|
||||
{
|
||||
u64 ltc_count = (u64)nvgpu_ltc_get_ltc_count(g);
|
||||
|
||||
if (base_divisor != NULL) {
|
||||
*base_divisor =
|
||||
ltc_count << fb_mmu_cbc_base_alignment_shift_v();
|
||||
}
|
||||
|
||||
if (top_divisor != NULL) {
|
||||
*top_divisor =
|
||||
ltc_count << fb_mmu_cbc_top_alignment_shift_v();
|
||||
}
|
||||
}
|
||||
|
||||
void tu104_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc)
|
||||
{
|
||||
u64 base_divisor;
|
||||
u64 top_divisor;
|
||||
u64 compbit_store_base;
|
||||
u64 compbit_store_pa;
|
||||
u64 cbc_start_addr, cbc_end_addr;
|
||||
@@ -114,17 +131,14 @@ void tu104_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc)
|
||||
u64 cbc_top_size;
|
||||
u32 cbc_max;
|
||||
|
||||
g->ops.fb.cbc_get_alignment(g, &base_divisor, &top_divisor);
|
||||
compbit_store_pa = nvgpu_mem_get_addr(g, &cbc->compbit_store.mem);
|
||||
base_divisor = g->ops.cbc.get_base_divisor(g);
|
||||
compbit_store_base = DIV_ROUND_UP(compbit_store_pa, base_divisor);
|
||||
|
||||
cbc_start_addr = (u64)nvgpu_ltc_get_ltc_count(g) *
|
||||
(compbit_store_base <<
|
||||
fb_mmu_cbc_base_address_alignment_shift_v());
|
||||
cbc_start_addr = compbit_store_base * base_divisor;
|
||||
cbc_end_addr = cbc_start_addr + cbc->compbit_backing_size;
|
||||
|
||||
cbc_top = (cbc_end_addr / nvgpu_ltc_get_ltc_count(g)) >>
|
||||
fb_mmu_cbc_base_address_alignment_shift_v();
|
||||
cbc_top = (cbc_end_addr / top_divisor);
|
||||
cbc_top_size = u64_lo32(cbc_top) - compbit_store_base;
|
||||
|
||||
nvgpu_assert(cbc_top_size < U64(U32_MAX));
|
||||
|
||||
@@ -31,6 +31,9 @@ struct nvgpu_mem;
|
||||
int fb_tu104_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb);
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
struct nvgpu_cbc;
|
||||
|
||||
void tu104_fb_cbc_get_alignment(struct gk20a *g,
|
||||
u64 *base_divisor, u64 *top_divisor);
|
||||
void tu104_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc);
|
||||
#endif
|
||||
int tu104_fb_apply_pdb_cache_errata(struct gk20a *g);
|
||||
|
||||
@@ -403,7 +403,6 @@ static const struct gops_cbc ga100_ops_cbc = {
|
||||
.cbc_init_support = nvgpu_cbc_init_support,
|
||||
.cbc_remove_support = nvgpu_cbc_remove_support,
|
||||
.init = tu104_cbc_init,
|
||||
.get_base_divisor = tu104_cbc_get_base_divisor,
|
||||
.alloc_comptags = ga100_cbc_alloc_comptags,
|
||||
.ctrl = tu104_cbc_ctrl,
|
||||
.fix_config = NULL,
|
||||
@@ -882,6 +881,7 @@ static const struct gops_fb ga100_ops_fb = {
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
.is_comptagline_mode_enabled = ga100_fb_is_comptagline_mode_enabled,
|
||||
.cbc_configure = tu104_fb_cbc_configure,
|
||||
.cbc_get_alignment = tu104_fb_cbc_get_alignment,
|
||||
.set_use_full_comp_tag_line = gm20b_fb_set_use_full_comp_tag_line,
|
||||
.compression_page_size = ga100_fb_compression_page_size,
|
||||
.compressible_page_size = gp10b_fb_compressible_page_size,
|
||||
|
||||
@@ -366,7 +366,6 @@ static const struct gops_cbc ga10b_ops_cbc = {
|
||||
.cbc_init_support = nvgpu_cbc_init_support,
|
||||
.cbc_remove_support = nvgpu_cbc_remove_support,
|
||||
.init = ga10b_cbc_init,
|
||||
.get_base_divisor = ga10b_cbc_get_base_divisor,
|
||||
.alloc_comptags = ga10b_cbc_alloc_comptags,
|
||||
.ctrl = tu104_cbc_ctrl,
|
||||
};
|
||||
@@ -871,6 +870,7 @@ static const struct gops_fb ga10b_ops_fb = {
|
||||
.mmu_debug_rd = gm20b_fb_mmu_debug_rd,
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
.cbc_configure = ga10b_fb_cbc_configure,
|
||||
.cbc_get_alignment = tu104_fb_cbc_get_alignment,
|
||||
.set_use_full_comp_tag_line = NULL,
|
||||
.compression_page_size = gp10b_fb_compression_page_size,
|
||||
.compressible_page_size = gp10b_fb_compressible_page_size,
|
||||
|
||||
@@ -341,7 +341,6 @@ static const struct gops_cbc tu104_ops_cbc = {
|
||||
.cbc_init_support = nvgpu_cbc_init_support,
|
||||
.cbc_remove_support = nvgpu_cbc_remove_support,
|
||||
.init = tu104_cbc_init,
|
||||
.get_base_divisor = tu104_cbc_get_base_divisor,
|
||||
.alloc_comptags = tu104_cbc_alloc_comptags,
|
||||
.ctrl = tu104_cbc_ctrl,
|
||||
.fix_config = NULL,
|
||||
@@ -797,6 +796,7 @@ static const struct gops_fb tu104_ops_fb = {
|
||||
.mmu_debug_rd = gm20b_fb_mmu_debug_rd,
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
.cbc_configure = tu104_fb_cbc_configure,
|
||||
.cbc_get_alignment = tu104_fb_cbc_get_alignment,
|
||||
.set_use_full_comp_tag_line = gm20b_fb_set_use_full_comp_tag_line,
|
||||
.compression_page_size = gp10b_fb_compression_page_size,
|
||||
.compressible_page_size = gp10b_fb_compressible_page_size,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -27,7 +27,6 @@ struct gops_cbc {
|
||||
int (*cbc_init_support)(struct gk20a *g);
|
||||
void (*cbc_remove_support)(struct gk20a *g);
|
||||
void (*init)(struct gk20a *g, struct nvgpu_cbc *cbc);
|
||||
u64 (*get_base_divisor)(struct gk20a *g);
|
||||
int (*alloc_comptags)(struct gk20a *g,
|
||||
struct nvgpu_cbc *cbc);
|
||||
int (*ctrl)(struct gk20a *g, enum nvgpu_cbc_op op,
|
||||
|
||||
@@ -440,6 +440,18 @@ struct gops_fb {
|
||||
(struct gk20a *g);
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
void (*cbc_configure)(struct gk20a *g, struct nvgpu_cbc *cbc);
|
||||
/**
|
||||
* @brief Get cbc base and top alignment specs.
|
||||
*
|
||||
* @param g [in] Pointer to GPU driver struct.
|
||||
*
|
||||
* The function calculates and returns required CBC base and top
|
||||
* alignment values.
|
||||
*
|
||||
* @return None.
|
||||
*/
|
||||
void (*cbc_get_alignment)(struct gk20a *g,
|
||||
u64 *base_divisor, u64 *top_divisor);
|
||||
bool (*set_use_full_comp_tag_line)(struct gk20a *g);
|
||||
|
||||
/*
|
||||
|
||||
@@ -687,13 +687,13 @@
|
||||
#define fb_mmu_num_active_ltcs_count_f(v) ((U32(v) & 0x1fU) << 0U)
|
||||
#define fb_mmu_num_active_ltcs_count_v(r) (((r) >> 0U) & 0x1fU)
|
||||
#define fb_mmu_cbc_base_r() (0x00100ec4U)
|
||||
#define fb_mmu_cbc_base_alignment_shift_v() (0x0000000bU)
|
||||
#define fb_mmu_cbc_base_address_f(v) ((U32(v) & 0x3ffffffU) << 0U)
|
||||
#define fb_mmu_cbc_base_address_v(r) (((r) >> 0U) & 0x3ffffffU)
|
||||
#define fb_mmu_cbc_base_address_alignment_shift_v() (0x0000000bU)
|
||||
#define fb_mmu_cbc_top_r() (0x00100ec8U)
|
||||
#define fb_mmu_cbc_top_alignment_shift_v() (0x0000000bU)
|
||||
#define fb_mmu_cbc_top_size_f(v) ((U32(v) & 0x7fffU) << 0U)
|
||||
#define fb_mmu_cbc_top_size_v(r) (((r) >> 0U) & 0x7fffU)
|
||||
#define fb_mmu_cbc_top_size_alignment_shift_v() (0x0000000bU)
|
||||
#define fb_mmu_cbc_max_r() (0x00100eccU)
|
||||
#define fb_mmu_cbc_max_comptagline_f(v) ((U32(v) & 0xffffffU) << 0U)
|
||||
#define fb_mmu_cbc_max_comptagline_m() (U32(0xffffffU) << 0U)
|
||||
|
||||
Reference in New Issue
Block a user