gpu: nvgpu: sim: make ring buffer independent of PAGE_SIZE

The simulator ring buffer DMA interface supports buffers of the following sizes:
4, 8, 12 and 16K. At present, it is configured to 4K and it  happens to match
with the kernel PAGE_SIZE, which is used to wrap back the GET/PUT pointers once
4K is reached. However, this is not always true; for instance, take 64K pages.
Hence, replace PAGE_SIZE with SIM_BFR_SIZE.

Introduce macro NVGPU_CPU_PAGE_SIZE which aliases to PAGE_SIZE and replace
latter with former.

Bug 200658101
Jira NVGPU-6018

Change-Id: I83cc62b87291734015c51f3e5a98173549e065de
Signed-off-by: Antony Clince Alex <aalex@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2420728
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Antony Clince Alex
2020-09-28 16:44:40 +05:30
committed by Alex Waterman
parent 09857ecd91
commit c36752fe3d
37 changed files with 120 additions and 103 deletions

View File

@@ -37,7 +37,7 @@ int nvgpu_userd_init_slabs(struct gk20a *g)
nvgpu_mutex_init(&f->userd_mutex);
f->num_channels_per_slab = PAGE_SIZE / g->ops.userd.entry_size(g);
f->num_channels_per_slab = NVGPU_CPU_PAGE_SIZE / g->ops.userd.entry_size(g);
f->num_userd_slabs =
DIV_ROUND_UP(f->num_channels, f->num_channels_per_slab);
@@ -88,7 +88,7 @@ int nvgpu_userd_init_channel(struct gk20a *g, struct nvgpu_channel *c)
nvgpu_mutex_acquire(&f->userd_mutex);
if (!nvgpu_mem_is_valid(mem)) {
err = nvgpu_dma_alloc_sys(g, PAGE_SIZE, mem);
err = nvgpu_dma_alloc_sys(g, NVGPU_CPU_PAGE_SIZE, mem);
if (err != 0) {
nvgpu_err(g, "userd allocation failed, err=%d", err);
goto done;
@@ -96,7 +96,7 @@ int nvgpu_userd_init_channel(struct gk20a *g, struct nvgpu_channel *c)
if (g->ops.mm.is_bar1_supported(g)) {
mem->gpu_va = g->ops.mm.bar1_map_userd(g, mem,
slab * PAGE_SIZE);
slab * NVGPU_CPU_PAGE_SIZE);
}
}
c->userd_mem = mem;
@@ -128,9 +128,9 @@ int nvgpu_userd_setup_sw(struct gk20a *g)
}
size = f->num_channels * g->ops.userd.entry_size(g);
num_pages = DIV_ROUND_UP(size, PAGE_SIZE);
num_pages = DIV_ROUND_UP(size, NVGPU_CPU_PAGE_SIZE);
err = nvgpu_vm_area_alloc(g->mm.bar1.vm,
num_pages, PAGE_SIZE, &f->userd_gpu_va, 0);
num_pages, NVGPU_CPU_PAGE_SIZE, &f->userd_gpu_va, 0);
if (err != 0) {
nvgpu_err(g, "userd gpu va allocation failed, err=%d", err);
goto clean_up;

View File

@@ -328,8 +328,8 @@ static int gr_init_access_map(struct gk20a *g, struct nvgpu_gr *gr)
struct nvgpu_mem *mem;
u32 nr_pages =
DIV_ROUND_UP(NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP_SIZE,
PAGE_SIZE);
u32 nr_pages_size = nvgpu_safe_mult_u32(PAGE_SIZE, nr_pages);
NVGPU_CPU_PAGE_SIZE);
u32 nr_pages_size = nvgpu_safe_mult_u32(NVGPU_CPU_PAGE_SIZE, nr_pages);
#ifdef CONFIG_NVGPU_SET_FALCON_ACCESS_MAP
u32 *whitelist = NULL;
u32 w, num_entries = 0U;

View File

@@ -529,7 +529,7 @@ static int nvgpu_init_syncpt_mem(struct gk20a *g)
if (nvgpu_has_syncpoints(g) && (g->syncpt_unit_size != 0UL)) {
if (!nvgpu_mem_is_valid(&g->syncpt_mem)) {
nr_pages = U64(DIV_ROUND_UP(g->syncpt_unit_size,
PAGE_SIZE));
NVGPU_CPU_PAGE_SIZE));
err = nvgpu_mem_create_from_phys(g, &g->syncpt_mem,
g->syncpt_unit_base, nr_pages);
if (err != 0) {

View File

@@ -195,14 +195,14 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm)
* aligned. Although lower PDE tables can be aligned at 256B boundaries
* the PDB must be 4K aligned.
*
* Currently PAGE_SIZE is used, even when 64K, to work around an issue
* Currently NVGPU_CPU_PAGE_SIZE is used, even when 64K, to work around an issue
* with the PDB TLB invalidate code not being pd_cache aware yet.
*
* Similarly, we can't use nvgpu_pd_alloc() here, because the top-level
* PD must have mem_offs be 0 for the invalidate code to work, so we
* can't use the PD cache.
*/
pdb_size = ALIGN(pd_get_size(&vm->mmu_levels[0], &attrs), PAGE_SIZE);
pdb_size = ALIGN(pd_get_size(&vm->mmu_levels[0], &attrs), NVGPU_CPU_PAGE_SIZE);
err = nvgpu_pd_cache_alloc_direct(vm->mm->g, &vm->pdb, pdb_size);
if (err != 0) {

View File

@@ -171,7 +171,7 @@ int nvgpu_pd_cache_alloc_direct(struct gk20a *g,
}
/*
* If bytes == PAGE_SIZE then it's impossible to get a discontiguous DMA
* If bytes == NVGPU_CPU_PAGE_SIZE then it's impossible to get a discontiguous DMA
* allocation. Some DMA implementations may, despite this fact, still
* use the contiguous pool for page sized allocations. As such only
* request explicitly contiguous allocs if the page directory is larger
@@ -180,7 +180,7 @@ int nvgpu_pd_cache_alloc_direct(struct gk20a *g,
* going to be virtually contiguous and we don't have to force the
* underlying allocations to be physically contiguous as well.
*/
if (!nvgpu_iommuable(g) && (bytes > PAGE_SIZE)) {
if (!nvgpu_iommuable(g) && (bytes > NVGPU_CPU_PAGE_SIZE)) {
flags = NVGPU_DMA_PHYSICALLY_ADDRESSED;
}
@@ -218,7 +218,7 @@ static int nvgpu_pd_cache_alloc_new(struct gk20a *g,
return -ENOMEM;
}
if (!nvgpu_iommuable(g) && (NVGPU_PD_CACHE_SIZE > PAGE_SIZE)) {
if (!nvgpu_iommuable(g) && (NVGPU_PD_CACHE_SIZE > NVGPU_CPU_PAGE_SIZE)) {
flags = NVGPU_DMA_PHYSICALLY_ADDRESSED;
}

View File

@@ -99,7 +99,7 @@
* PD cache size to be 64K if PAGE_SIZE > 4K (i.e PAGE_SIZE == 64K).
*/
#ifdef __KERNEL__
# if PAGE_SIZE > 4096
# if NVGPU_CPU_PAGE_SIZE > 4096
# define NVGPU_PD_CACHE_COUNT 8UL
# else
# define NVGPU_PD_CACHE_COUNT 4UL

View File

@@ -402,7 +402,8 @@ int nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
(void) memset(dest, 0, sizeof(*dest));
dest->aperture = APERTURE_SYSMEM;
dest->size = nvgpu_safe_mult_u64(nr_pages, (u64)SZ_4K);
dest->size = nvgpu_safe_mult_u64(nr_pages,
(u64)NVGPU_CPU_PAGE_SIZE);
dest->aligned_size = dest->size;
dest->mem_flags = NVGPU_MEM_FLAG_NO_DMA;
dest->phys_sgt = sgt;

View File

@@ -373,7 +373,7 @@ static int nvgpu_init_sema_pool(struct vm_gk20a *vm)
nvgpu_semaphore_sea_allocate_gpu_va(sema_sea, &vm->kernel,
nvgpu_safe_sub_u64(vm->va_limit,
mm->channel.kernel_size),
512U * PAGE_SIZE,
512U * NVGPU_CPU_PAGE_SIZE,
nvgpu_safe_cast_u64_to_u32(SZ_4K));
if (nvgpu_semaphore_sea_get_gpu_va(sema_sea) == 0ULL) {
nvgpu_free(&vm->kernel,
@@ -741,7 +741,7 @@ static int nvgpu_vm_init_attributes(struct mm_gk20a *mm,
nvgpu_safe_cast_u64_to_u32(SZ_4K);
vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG] = big_page_size;
vm->gmmu_page_sizes[GMMU_PAGE_SIZE_KERNEL] =
nvgpu_safe_cast_u64_to_u32(PAGE_SIZE);
nvgpu_safe_cast_u64_to_u32(NVGPU_CPU_PAGE_SIZE);
/* Set up vma pointers. */
vm->vma[GMMU_PAGE_SIZE_SMALL] = &vm->user;

View File

@@ -46,7 +46,7 @@ int nvgpu_hw_semaphore_init(struct vm_gk20a *vm, u32 chid,
/* Find an available HW semaphore. */
hw_sema_idx = semaphore_bitmap_alloc(p->semas_alloced,
PAGE_SIZE / SEMAPHORE_SIZE);
NVGPU_CPU_PAGE_SIZE / SEMAPHORE_SIZE);
if (hw_sema_idx < 0) {
ret = hw_sema_idx;
goto fail;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -244,7 +244,7 @@ u64 nvgpu_semaphore_pool_gpu_va(struct nvgpu_semaphore_pool *p, bool global)
return p->gpu_va;
}
return p->gpu_va_ro + (PAGE_SIZE * p->page_idx);
return p->gpu_va_ro + (NVGPU_CPU_PAGE_SIZE * p->page_idx);
}
/*

View File

@@ -93,7 +93,7 @@ struct nvgpu_semaphore_pool {
u64 gpu_va_ro; /* GPU access to the pool. */
u64 page_idx; /* Index into sea bitmap. */
DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE);
DECLARE_BITMAP(semas_alloced, NVGPU_CPU_PAGE_SIZE / SEMAPHORE_SIZE);
struct nvgpu_semaphore_sea *sema_sea; /* Sea that owns this pool. */

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -50,21 +50,21 @@ static int semaphore_sea_grow(struct nvgpu_semaphore_sea *sea)
nvgpu_semaphore_sea_lock(sea);
ret = nvgpu_dma_alloc_sys(g,
PAGE_SIZE * SEMAPHORE_POOL_COUNT,
NVGPU_CPU_PAGE_SIZE * SEMAPHORE_POOL_COUNT,
&sea->sea_mem);
if (ret != 0) {
goto out;
}
sea->size = SEMAPHORE_POOL_COUNT;
sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE;
sea->map_size = SEMAPHORE_POOL_COUNT * NVGPU_CPU_PAGE_SIZE;
/*
* Start the semaphores at values that will soon overflow the 32-bit
* integer range. This way any buggy comparisons would start to fail
* sooner rather than later.
*/
for (i = 0U; i < PAGE_SIZE * SEMAPHORE_POOL_COUNT; i += 4U) {
for (i = 0U; i < NVGPU_CPU_PAGE_SIZE * SEMAPHORE_POOL_COUNT; i += 4U) {
nvgpu_mem_wr(g, &sea->sea_mem, i, 0xfffffff0U);
}

View File

@@ -36,7 +36,7 @@ int nvgpu_alloc_sim_buffer(struct gk20a *g, struct nvgpu_mem *mem)
int err = 0;
if (!nvgpu_mem_is_valid(mem)) {
err = nvgpu_dma_alloc_sys(g, PAGE_SIZE, mem);
err = nvgpu_dma_alloc_sys(g, NVGPU_CPU_PAGE_SIZE, mem);
}
return err;
@@ -67,7 +67,7 @@ void nvgpu_remove_sim_support(struct gk20a *g)
void sim_write_hdr(struct gk20a *g, u32 func, u32 size)
{
/*memset(g->sim->msg_bfr.kvaddr,0,min(PAGE_SIZE,size));*/
/*memset(g->sim->msg_bfr.kvaddr,0,min(NVGPU_CPU_PAGE_SIZE,size));*/
*sim_msg_hdr(g, sim_msg_signature_r()) = sim_msg_signature_valid_v();
*sim_msg_hdr(g, sim_msg_result_r()) = sim_msg_result_rpc_pending_v();
*sim_msg_hdr(g, sim_msg_spare_r()) = sim_msg_spare__init_v();
@@ -104,7 +104,7 @@ static int rpc_send_message(struct gk20a *g)
*sim_msg_hdr(g, sim_msg_sequence_r()) = g->sim->sequence_base++;
g->sim->send_ring_put = (g->sim->send_ring_put + 2 * sizeof(u32))
% PAGE_SIZE;
% SIM_BFR_SIZE;
/* Update the put pointer. This will trap into the host. */
sim_writel(g->sim, sim_send_put_r(), g->sim->send_ring_put);
@@ -156,7 +156,7 @@ static int rpc_recv_poll(struct gk20a *g)
/* Update GET pointer */
g->sim->recv_ring_get = (g->sim->recv_ring_get + 2*sizeof(u32))
% PAGE_SIZE;
% SIM_BFR_SIZE;
sim_writel(g->sim, sim_recv_get_r(), g->sim->recv_ring_get);

View File

@@ -83,7 +83,7 @@ static int rpc_send_message(struct gk20a *g)
*sim_msg_hdr(g, sim_msg_sequence_r()) = g->sim->sequence_base++;
g->sim->send_ring_put = (g->sim->send_ring_put + 2 * sizeof(u32)) %
PAGE_SIZE;
SIM_BFR_SIZE;
/* Update the put pointer. This will trap into the host. */
sim_writel(g->sim, sim_send_put_r(), g->sim->send_ring_put);
@@ -131,7 +131,7 @@ static int rpc_recv_poll(struct gk20a *g)
/* Update GET pointer */
g->sim->recv_ring_get = (g->sim->recv_ring_get + 2*sizeof(u32))
% PAGE_SIZE;
% SIM_BFR_SIZE;
sim_writel(g->sim, sim_recv_get_r(), g->sim->recv_ring_get);

View File

@@ -118,7 +118,7 @@ int vgpu_init_fifo_setup_hw(struct gk20a *g)
nvgpu_log_fn(g, " ");
/* allocate and map first userd slab for bar1 test. */
err = nvgpu_dma_alloc_sys(g, PAGE_SIZE, mem);
err = nvgpu_dma_alloc_sys(g, NVGPU_CPU_PAGE_SIZE, mem);
if (err != 0) {
nvgpu_err(g, "userd allocation failed, err=%d", err);
return err;

View File

@@ -191,7 +191,7 @@ int tu104_fb_apply_pdb_cache_war(struct gk20a *g)
/* Bind 256 instance blocks to unused engine ID 0x0 */
for (i = 0U; i < 256U; i++) {
inst_blk_addr = u64_lo32((inst_blk_base_addr +
(U64(i) * U64(PAGE_SIZE)))
(U64(i) * U64(NVGPU_CPU_PAGE_SIZE)))
>> fb_mmu_bind_imb_addr_alignment_v());
nvgpu_writel(g, fb_mmu_bind_imb_r(),
@@ -236,7 +236,7 @@ int tu104_fb_apply_pdb_cache_war(struct gk20a *g)
}
/* Bind 257th (last) instance block that reserves PDB cache entry 255 */
inst_blk_addr = u64_lo32((inst_blk_base_addr + (256ULL * U64(PAGE_SIZE)))
inst_blk_addr = u64_lo32((inst_blk_base_addr + (256ULL * U64(NVGPU_CPU_PAGE_SIZE)))
>> U64(fb_mmu_bind_imb_addr_alignment_v()));
nvgpu_writel(g, fb_mmu_bind_imb_r(),

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -32,7 +32,7 @@
int tu104_ramin_init_pdb_cache_war(struct gk20a *g)
{
u32 size = PAGE_SIZE * 258U;
u32 size = NVGPU_CPU_PAGE_SIZE * 258U;
u64 last_bind_pdb_addr;
u64 pdb_addr;
u32 pdb_addr_lo, pdb_addr_hi;
@@ -57,9 +57,9 @@ int tu104_ramin_init_pdb_cache_war(struct gk20a *g)
* valid memory
* First 256 binds can happen to dummy addresses
*/
pdb_addr = PAGE_SIZE;
pdb_addr = NVGPU_CPU_PAGE_SIZE;
last_bind_pdb_addr = nvgpu_mem_get_addr(g, &g->pdb_cache_war_mem) +
(257U * PAGE_SIZE);
(257U * NVGPU_CPU_PAGE_SIZE);
/* Setup first 256 instance blocks */
for (i = 0U; i < 256U; i++) {
@@ -67,7 +67,7 @@ int tu104_ramin_init_pdb_cache_war(struct gk20a *g)
pdb_addr_hi = u64_hi32(pdb_addr);
nvgpu_mem_wr32(g, &g->pdb_cache_war_mem,
ram_in_page_dir_base_lo_w() + (i * PAGE_SIZE / 4U),
ram_in_page_dir_base_lo_w() + (i * NVGPU_CPU_PAGE_SIZE / 4U),
nvgpu_aperture_mask(g, &g->pdb_cache_war_mem,
ram_in_page_dir_base_target_sys_mem_ncoh_f(),
ram_in_page_dir_base_target_sys_mem_coh_f(),
@@ -78,10 +78,10 @@ int tu104_ramin_init_pdb_cache_war(struct gk20a *g)
ram_in_use_ver2_pt_format_true_f());
nvgpu_mem_wr32(g, &g->pdb_cache_war_mem,
ram_in_page_dir_base_hi_w() + (i * PAGE_SIZE / 4U),
ram_in_page_dir_base_hi_w() + (i * NVGPU_CPU_PAGE_SIZE / 4U),
ram_in_page_dir_base_hi_f(pdb_addr_hi));
pdb_addr += PAGE_SIZE;
pdb_addr += NVGPU_CPU_PAGE_SIZE;
}
/* Setup 257th instance block */
@@ -89,7 +89,7 @@ int tu104_ramin_init_pdb_cache_war(struct gk20a *g)
pdb_addr_hi = u64_hi32(last_bind_pdb_addr);
nvgpu_mem_wr32(g, &g->pdb_cache_war_mem,
ram_in_page_dir_base_lo_w() + (256U * PAGE_SIZE / 4U),
ram_in_page_dir_base_lo_w() + (256U * NVGPU_CPU_PAGE_SIZE / 4U),
nvgpu_aperture_mask(g, &g->pdb_cache_war_mem,
ram_in_page_dir_base_target_sys_mem_ncoh_f(),
ram_in_page_dir_base_target_sys_mem_coh_f(),
@@ -100,7 +100,7 @@ int tu104_ramin_init_pdb_cache_war(struct gk20a *g)
ram_in_use_ver2_pt_format_true_f());
nvgpu_mem_wr32(g, &g->pdb_cache_war_mem,
ram_in_page_dir_base_hi_w() + (256U * PAGE_SIZE / 4U),
ram_in_page_dir_base_hi_w() + (256U * NVGPU_CPU_PAGE_SIZE / 4U),
ram_in_page_dir_base_hi_f(pdb_addr_hi));
return 0;

View File

@@ -108,7 +108,7 @@ int gv11b_tsg_init_eng_method_buffers(struct gk20a *g, struct nvgpu_tsg *tsg)
int err = 0;
int i;
unsigned int runque, buffer_size;
u32 page_size = U32(PAGE_SIZE);
u32 page_size = U32(NVGPU_CPU_PAGE_SIZE);
unsigned int num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
if (tsg->eng_method_buffers != NULL) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -51,7 +51,7 @@ u64 gk20a_mm_bar1_map_userd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
u64 gpu_va = f->userd_gpu_va + offset;
return nvgpu_gmmu_map_fixed(g->mm.bar1.vm, mem, gpu_va,
PAGE_SIZE, 0,
NVGPU_CPU_PAGE_SIZE, 0,
gk20a_mem_flag_none, false,
mem->aperture);
}

View File

@@ -74,7 +74,7 @@ int gv11b_syncpt_alloc_buf(struct nvgpu_channel *c,
return err;
}
nr_pages = DIV_ROUND_UP(g->syncpt_size, PAGE_SIZE);
nr_pages = DIV_ROUND_UP(g->syncpt_size, NVGPU_CPU_PAGE_SIZE);
err = nvgpu_mem_create_from_phys(g, syncpt_buf,
nvgpu_safe_add_u64(g->syncpt_unit_base,
nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(g,

View File

@@ -239,7 +239,7 @@ static inline const char *nvgpu_gmmu_perm_str(enum gk20a_mem_rw_flag p)
* aligned. Although lower PDE tables can be aligned at 256B boundaries
* the PDB must be 4K aligned.
*
* Currently PAGE_SIZE is used, even when 64K, to work around an issue
* Currently NVGPU_CPU_PAGE_SIZE is used, even when 64K, to work around an issue
* with the PDB TLB invalidate code not being pd_cache aware yet.
*
* @return 0 in case of success.

View File

@@ -55,9 +55,9 @@ struct zcull_ctx_desc;
*/
/** Number of slots required in patch buffer per entry. */
#define PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY 2U
/** Number of slots per PAGE_SIZE. */
/** Number of slots per NVGPU_CPU_PAGE_SIZE. */
#define PATCH_CTX_SLOTS_PER_PAGE \
(PAGE_SIZE/(PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY * (u32)sizeof(u32)))
(NVGPU_CPU_PAGE_SIZE/(PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY * (u32)sizeof(u32)))
/** Get number of entries in patch buffer given the size of buffer. */
#define PATCH_CTX_ENTRIES_FROM_SIZE(size) ((size)/sizeof(u32))

View File

@@ -32,6 +32,16 @@
#endif
/** @endcond DOXYGEN_SHOULD_SKIP_THIS */
/*
* Size of SIM ring buffers.
*
* Although, each buffer is allocated with NVGPU_CPU_PAGE_SIZE bytes.
* The send and receive interface can only be configured to work with buffers of
* sizes: 4K, 8K, 12K and 16K. Furthermore, this size should match with size
* configured in the fmodel chiplib. At present, the agreed buffer size is 4K.
*/
#define SIM_BFR_SIZE (SZ_4K)
struct sim_nvgpu {
struct gk20a *g;
u32 send_ring_put;

View File

@@ -32,6 +32,12 @@
#include <nvgpu/posix/utils.h>
#endif
/*
* PAGE_SIZE is OS specific and can vary across OSes. Depending on the OS it maybe
* defined to 4K or 64K.
*/
#define NVGPU_CPU_PAGE_SIZE PAGE_SIZE
/**
* @file
*

View File

@@ -200,7 +200,7 @@ int nvgpu_gr_fecs_trace_ring_alloc(struct gk20a *g,
{
struct nvgpu_ctxsw_ring_header *hdr;
*size = round_up(*size, PAGE_SIZE);
*size = round_up(*size, NVGPU_CPU_PAGE_SIZE);
hdr = vmalloc_user(*size);
if (!hdr)
return -ENOMEM;
@@ -531,7 +531,7 @@ int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
unsigned long vsize = vma->vm_end - vma->vm_start;
size = min(mmapsize, vsize);
size = round_up(size, PAGE_SIZE);
size = round_up(size, NVGPU_CPU_PAGE_SIZE);
ret = remap_pfn_range(vma, vma->vm_start,
(unsigned long) mmapaddr,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2019, NVIDIA Corporation. All rights reserved.
* Copyright (c) 2017-2020, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -20,7 +20,7 @@
#include <nvgpu/types.h>
#define GK20A_CTXSW_TRACE_NUM_DEVS 1
#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*NVGPU_CPU_PAGE_SIZE)
struct file;
struct inode;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -40,7 +40,7 @@ void *nvgpu_big_alloc_impl(struct gk20a *g, size_t size, bool clear)
{
void *p;
if (size > PAGE_SIZE) {
if (size > NVGPU_CPU_PAGE_SIZE) {
if (clear)
p = nvgpu_vzalloc(g, size);
else
@@ -314,12 +314,12 @@ static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
static void __nvgpu_check_valloc_size(unsigned long size)
{
WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
WARN(size < NVGPU_CPU_PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
}
static void __nvgpu_check_kalloc_size(size_t size)
{
WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
WARN(size > NVGPU_CPU_PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
}
void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
@@ -570,7 +570,7 @@ int nvgpu_kmem_init(struct gk20a *g)
nvgpu_mutex_init(&g->vmallocs->lock);
nvgpu_mutex_init(&g->kmallocs->lock);
g->vmallocs->min_alloc = PAGE_SIZE;
g->vmallocs->min_alloc = NVGPU_CPU_PAGE_SIZE;
g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
/*

View File

@@ -180,7 +180,7 @@ static void *nvgpu_dma_alloc_no_iommu(struct device *dev, size_t size,
struct page **pages;
int i = 0;
if (array_size <= PAGE_SIZE)
if (array_size <= NVGPU_CPU_PAGE_SIZE)
pages = kzalloc(array_size, GFP_KERNEL);
else
pages = vzalloc(array_size);
@@ -205,7 +205,7 @@ static void *nvgpu_dma_alloc_no_iommu(struct device *dev, size_t size,
pages[i + j] = pages[i] + j;
}
memset(page_address(pages[i]), 0, PAGE_SIZE << order);
memset(page_address(pages[i]), 0, NVGPU_CPU_PAGE_SIZE << order);
i += 1 << order;
count -= 1 << order;
@@ -216,7 +216,7 @@ static void *nvgpu_dma_alloc_no_iommu(struct device *dev, size_t size,
return (void *)pages;
error:
__nvgpu_dma_free_no_iommu(pages, i, array_size > PAGE_SIZE);
__nvgpu_dma_free_no_iommu(pages, i, array_size > NVGPU_CPU_PAGE_SIZE);
return NULL;
}
@@ -228,7 +228,7 @@ static void nvgpu_dma_free_no_iommu(size_t size, void *vaddr)
WARN_ON(!pages);
__nvgpu_dma_free_no_iommu(pages, count, array_size > PAGE_SIZE);
__nvgpu_dma_free_no_iommu(pages, count, array_size > NVGPU_CPU_PAGE_SIZE);
}
/* Check if IOMMU is available and if GPU uses it */
@@ -570,7 +570,7 @@ int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt,
}
err = sg_alloc_table_from_pages(tbl, pages,
DIV_ROUND_UP(size, PAGE_SIZE),
DIV_ROUND_UP(size, NVGPU_CPU_PAGE_SIZE),
0, size, GFP_KERNEL);
if (err)
goto fail;

View File

@@ -158,8 +158,8 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
u64 start_page, size_t nr_pages)
{
int ret;
u64 start = start_page * PAGE_SIZE;
u64 size = nr_pages * PAGE_SIZE;
u64 start = start_page * NVGPU_CPU_PAGE_SIZE;
u64 size = nr_pages * NVGPU_CPU_PAGE_SIZE;
dma_addr_t new_iova;
if (src->aperture != APERTURE_SYSMEM)
@@ -178,7 +178,7 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
/* Re-use the CPU mapping only if the mapping was made by the DMA API */
if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
dest->cpu_va = src->cpu_va + (NVGPU_CPU_PAGE_SIZE * start_page);
dest->priv.pages = src->priv.pages + start_page;
dest->priv.flags = src->priv.flags;

View File

@@ -117,7 +117,7 @@ static ssize_t probed_gpus_show(struct device_driver *drv, char *buf)
ssize_t count = 0;
list_for_each_entry_safe(pp, tmp_pp, &nvgpu_pci_power_devs, list) {
count += snprintf(buf, PAGE_SIZE - count, "pci-%s\t%s\n",
count += snprintf(buf, NVGPU_CPU_PAGE_SIZE - count, "pci-%s\t%s\n",
pp->pci_dev_name,
pp->pci_dev ? "PoweredOn" : "PoweredOff");
}

View File

@@ -666,7 +666,7 @@ int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform)
return 0;
}
#if PAGE_SIZE > 4096
#if NVGPU_CPU_PAGE_SIZE > 4096
platform->secure_buffer_size += SZ_64K;
#endif
(void)dma_alloc_attrs(&tegra_vpr_dev, platform->secure_buffer_size, &iova,

View File

@@ -206,7 +206,7 @@ static int gp10b_tegra_probe(struct device *dev)
return ret;
platform->disable_bigpage = !iommu_get_domain_for_dev(dev) &&
(PAGE_SIZE < SZ_64K);
(NVGPU_CPU_PAGE_SIZE < SZ_64K);
#ifdef CONFIG_OF
of_chosen = of_find_node_by_path("/chosen");

View File

@@ -96,7 +96,7 @@ static int gv11b_tegra_probe(struct device *dev)
return err;
platform->disable_bigpage = !(iommu_get_domain_for_dev(dev)) &&
(PAGE_SIZE < SZ_64K);
(NVGPU_CPU_PAGE_SIZE < SZ_64K);
#ifdef CONFIG_OF
of_chosen = of_find_node_by_path("/chosen");

View File

@@ -79,7 +79,7 @@ static ssize_t elcg_enable_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%d\n", g->elcg_enabled ? 1 : 0);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%d\n", g->elcg_enabled ? 1 : 0);
}
static DEVICE_ATTR(elcg_enable, ROOTRW, elcg_enable_read, elcg_enable_store);
@@ -119,7 +119,7 @@ static ssize_t blcg_enable_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%d\n", g->blcg_enabled ? 1 : 0);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%d\n", g->blcg_enabled ? 1 : 0);
}
@@ -165,7 +165,7 @@ static ssize_t slcg_enable_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%d\n", g->slcg_enabled ? 1 : 0);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%d\n", g->slcg_enabled ? 1 : 0);
}
static DEVICE_ATTR(slcg_enable, ROOTRW, slcg_enable_read, slcg_enable_store);
@@ -189,7 +189,7 @@ static ssize_t ptimer_scale_factor_show(struct device *dev,
((u32)(src_freq_hz) /
(u32)(PTIMER_FP_FACTOR));
res = snprintf(buf,
PAGE_SIZE,
NVGPU_CPU_PAGE_SIZE,
"%u.%u\n",
scaling_factor_fp / PTIMER_FP_FACTOR,
scaling_factor_fp % PTIMER_FP_FACTOR);
@@ -217,7 +217,7 @@ static ssize_t ptimer_ref_freq_show(struct device *dev,
return -EINVAL;
}
res = snprintf(buf, PAGE_SIZE, "%u\n", PTIMER_REF_FREQ_HZ);
res = snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%u\n", PTIMER_REF_FREQ_HZ);
return res;
@@ -242,7 +242,7 @@ static ssize_t ptimer_src_freq_show(struct device *dev,
return -EINVAL;
}
res = snprintf(buf, PAGE_SIZE, "%u\n", src_freq_hz);
res = snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%u\n", src_freq_hz);
return res;
@@ -260,7 +260,7 @@ static ssize_t gpu_powered_on_show(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%s\n", nvgpu_get_power_state(g));
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%s\n", nvgpu_get_power_state(g));
}
static DEVICE_ATTR(gpu_powered_on, S_IRUGO, gpu_powered_on_show, NULL);
@@ -318,7 +318,7 @@ static ssize_t railgate_enable_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%d\n",
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%d\n",
nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ? 1 : 0);
}
@@ -360,7 +360,7 @@ static ssize_t railgate_delay_show(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%d\n", g->railgate_delay);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%d\n", g->railgate_delay);
}
static DEVICE_ATTR(railgate_delay, ROOTRW, railgate_delay_show,
railgate_delay_store);
@@ -374,7 +374,7 @@ static ssize_t is_railgated_show(struct device *dev,
if (platform->is_railgated)
is_railgated = platform->is_railgated(dev);
return snprintf(buf, PAGE_SIZE, "%s\n", is_railgated ? "yes" : "no");
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%s\n", is_railgated ? "yes" : "no");
}
static DEVICE_ATTR(is_railgated, S_IRUGO, is_railgated_show, NULL);
@@ -387,7 +387,7 @@ static ssize_t counters_show(struct device *dev,
nvgpu_pmu_get_load_counters(g, &busy_cycles, &total_cycles);
res = snprintf(buf, PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles);
res = snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%u %u\n", busy_cycles, total_cycles);
return res;
}
@@ -427,7 +427,7 @@ static ssize_t gk20a_load_show(struct device *dev,
gk20a_idle(g);
}
res = snprintf(buf, PAGE_SIZE, "%u\n", busy_time);
res = snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%u\n", busy_time);
return res;
}
@@ -468,7 +468,7 @@ static ssize_t elpg_enable_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%d\n",
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%d\n",
nvgpu_pg_elpg_is_enabled(g) ? 1 : 0);
}
@@ -521,7 +521,7 @@ static ssize_t ldiv_slowdown_factor_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%d\n", g->ldiv_slowdown_factor);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%d\n", g->ldiv_slowdown_factor);
}
static DEVICE_ATTR(ldiv_slowdown_factor, ROOTRW,
@@ -588,7 +588,7 @@ static ssize_t mscg_enable_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%d\n", g->mscg_enabled ? 1 : 0);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%d\n", g->mscg_enabled ? 1 : 0);
}
static DEVICE_ATTR(mscg_enable, ROOTRW, mscg_enable_read, mscg_enable_store);
@@ -641,7 +641,7 @@ static ssize_t aelpg_param_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE,
return snprintf(buf, NVGPU_CPU_PAGE_SIZE,
"%d %d %d %d %d\n", g->pmu->pg->aelpg_param[0],
g->pmu->pg->aelpg_param[1], g->pmu->pg->aelpg_param[2],
g->pmu->pg->aelpg_param[3], g->pmu->pg->aelpg_param[4]);
@@ -697,7 +697,7 @@ static ssize_t aelpg_enable_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%d\n", g->aelpg_enabled ? 1 : 0);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%d\n", g->aelpg_enabled ? 1 : 0);
}
static DEVICE_ATTR(aelpg_enable, ROOTRW,
@@ -709,7 +709,7 @@ static ssize_t allow_all_enable_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%d\n", g->allow_all ? 1 : 0);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%d\n", g->allow_all ? 1 : 0);
}
static ssize_t allow_all_enable_store(struct device *dev,
@@ -751,7 +751,7 @@ static ssize_t emc3d_ratio_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%d\n", g->emc3d_ratio);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%d\n", g->emc3d_ratio);
}
static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store);
@@ -765,7 +765,7 @@ static ssize_t fmax_at_vmin_safe_read(struct device *dev,
if (g->ops.clk.get_fmax_at_vmin_safe)
gpu_fmax_at_vmin_hz = g->ops.clk.get_fmax_at_vmin_safe(g);
return snprintf(buf, PAGE_SIZE, "%d\n", (int)(gpu_fmax_at_vmin_hz));
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%d\n", (int)(gpu_fmax_at_vmin_hz));
}
static DEVICE_ATTR(fmax_at_vmin_safe, S_IRUGO, fmax_at_vmin_safe_read, NULL);
@@ -813,7 +813,7 @@ static ssize_t force_idle_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%d\n", g->forced_idle ? 1 : 0);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%d\n", g->forced_idle ? 1 : 0);
}
static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store);
@@ -824,7 +824,7 @@ static ssize_t tpc_pg_mask_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%d\n", g->tpc_pg_mask);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%d\n", g->tpc_pg_mask);
}
static bool is_tpc_mask_valid(struct gk20a *g, u32 tpc_mask)
@@ -963,7 +963,7 @@ static ssize_t tpc_fs_mask_read(struct device *dev,
gk20a_idle(g);
return snprintf(buf, PAGE_SIZE, "0x%x\n", tpc_fs_mask);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "0x%x\n", tpc_fs_mask);
}
static DEVICE_ATTR(tpc_fs_mask, ROOTRW, tpc_fs_mask_read, tpc_fs_mask_store);
@@ -973,7 +973,7 @@ static ssize_t tsg_timeslice_min_us_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%u\n", g->tsg_timeslice_min_us);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%u\n", g->tsg_timeslice_min_us);
}
static ssize_t tsg_timeslice_min_us_store(struct device *dev,
@@ -1001,7 +1001,7 @@ static ssize_t tsg_timeslice_max_us_read(struct device *dev,
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%u\n", g->tsg_timeslice_max_us);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%u\n", g->tsg_timeslice_max_us);
}
static ssize_t tsg_timeslice_max_us_store(struct device *dev,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -37,7 +37,7 @@ static ssize_t vgpu_load_show(struct device *dev,
if (err)
return err;
return snprintf(buf, PAGE_SIZE, "%u\n", p->load);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%u\n", p->load);
}
static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL);
@@ -64,7 +64,7 @@ static ssize_t vgpu_ecc_stat_show(struct device *dev,
return err;
}
return snprintf(buf, PAGE_SIZE, "%u\n", p->value);
return snprintf(buf, NVGPU_CPU_PAGE_SIZE, "%u\n", p->value);
}
static int vgpu_create_ecc_sysfs(struct device *dev)

View File

@@ -111,7 +111,7 @@ static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform)
nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init);
g->railgate_delay = platform->railgate_delay_init;
g->mm.disable_bigpage = PAGE_SIZE < SZ_64K;
g->mm.disable_bigpage = NVGPU_CPU_PAGE_SIZE < SZ_64K;
nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY,
platform->unified_memory);
nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -298,8 +298,8 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
struct nvgpu_mem *dest, struct nvgpu_mem *src,
u64 start_page, size_t nr_pages)
{
u64 start = start_page * U64(PAGE_SIZE);
u64 size = U64(nr_pages) * U64(PAGE_SIZE);
u64 start = start_page * U64(NVGPU_CPU_PAGE_SIZE);
u64 size = U64(nr_pages) * U64(NVGPU_CPU_PAGE_SIZE);
if (src->aperture != APERTURE_SYSMEM) {
return -EINVAL;