gpu: nvgpu: perbuf: update PMA buffer mapping

The PMA unit can only access GPU VAs within a 4GB window, hence both
the user allocated PMA buffer and the kernel allocated bytes available
buffer should lie in the same 4GB window. This is accomplished by
carving out and reserving a 4GB VA space in perbuf.vm and using fixed
GPU VAs to ensure that both buffers are bound within the same 4GB window.

In addition, update ALLOC_PMA_STREAM to use pma_buffer_offset,
pma_buffer_map_size fields correctly.

Bug 3503708

Change-Id: Ic5297a22c2db42b18ff5e676d565d3be3c1cd780
Signed-off-by: Antony Clince Alex <aalex@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2671637
Reviewed-by: svcacv <svcacv@nvidia.com>
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Vaibhav Kachore <vkachore@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Antony Clince Alex
2022-02-23 20:26:59 +00:00
committed by mobile promotions
parent e7c082aa66
commit c0f4723339
7 changed files with 66 additions and 33 deletions

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -28,9 +28,6 @@
#include <nvgpu/vm_area.h> #include <nvgpu/vm_area.h>
#include <nvgpu/utils.h> #include <nvgpu/utils.h>
#define PERFBUF_PMA_BYTES_AVAILABLE_BUFFER_FIXED_GPU_VA 0x4000000ULL
#define PERFBUF_PMA_BYTES_AVAILABLE_BUFFER_MAX_SIZE NVGPU_CPU_PAGE_SIZE
int nvgpu_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size) int nvgpu_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
{ {
int err; int err;
@@ -100,22 +97,38 @@ int nvgpu_perfbuf_init_vm(struct gk20a *g)
} }
/* /*
* PMA available byte buffer GPU_VA needs to fit in 32 bit * The PMA unit can only access GPU VAs within a 4GB window which
* register, hence use a fixed GPU_VA to map it. * includes PMA_BUF + PMA_AVAILABLE_BYTES_BUF, hence carveout and
* Only one PMA stream is allowed right now so this works. * reserved a 4GB window from the perfbuf.vm VA space and use this
* This should be updated later to support multiple PMA streams. * VA while binding the buffers.
*/ */
mm->perfbuf.pma_bytes_available_buffer_gpu_va = mm->perfbuf.pma_buffer_gpu_va = 0;
PERFBUF_PMA_BYTES_AVAILABLE_BUFFER_FIXED_GPU_VA;
err = nvgpu_vm_area_alloc(mm->perfbuf.vm, err = nvgpu_vm_area_alloc(mm->perfbuf.vm,
PERFBUF_PMA_BYTES_AVAILABLE_BUFFER_MAX_SIZE / SZ_4K, PERFBUF_PMA_MEM_WINDOW_SIZE / SZ_4K,
SZ_4K, &mm->perfbuf.pma_bytes_available_buffer_gpu_va, SZ_4K, &mm->perfbuf.pma_buffer_gpu_va, 0);
NVGPU_VM_AREA_ALLOC_FIXED_OFFSET);
if (err != 0) { if (err != 0) {
nvgpu_vm_put(mm->perfbuf.vm); nvgpu_vm_put(mm->perfbuf.vm);
return err; return err;
} }
mm->perfbuf.pma_bytes_available_buffer_gpu_va = nvgpu_safe_add_u64(
mm->perfbuf.pma_buffer_gpu_va,
PERFBUF_PMA_BUF_MAX_SIZE);
if (u64_hi32(mm->perfbuf.pma_bytes_available_buffer_gpu_va) !=
u64_hi32(mm->perfbuf.pma_buffer_gpu_va)) {
nvgpu_err(g, "perfbuf: 0x%llx, 0x%llx, crosses 4GB boundary",
mm->perfbuf.pma_buffer_gpu_va,
mm->perfbuf.pma_bytes_available_buffer_gpu_va);
nvgpu_vm_area_free(mm->perfbuf.vm,
mm->perfbuf.pma_buffer_gpu_va);
nvgpu_vm_put(mm->perfbuf.vm);
return -ENOMEM;
}
nvgpu_log(g, gpu_dbg_prof, "perfbuf: 0x%llx, 0x%llx",
mm->perfbuf.pma_buffer_gpu_va,
mm->perfbuf.pma_bytes_available_buffer_gpu_va);
err = g->ops.perfbuf.init_inst_block(g); err = g->ops.perfbuf.init_inst_block(g);
if (err != 0) { if (err != 0) {
@@ -138,8 +151,7 @@ void nvgpu_perfbuf_deinit_vm(struct gk20a *g)
g->ops.perfbuf.deinit_inst_block(g); g->ops.perfbuf.deinit_inst_block(g);
nvgpu_vm_area_free(mm->perfbuf.vm, nvgpu_vm_area_free(mm->perfbuf.vm, mm->perfbuf.pma_buffer_gpu_va);
mm->perfbuf.pma_bytes_available_buffer_gpu_va);
nvgpu_vm_put(g->mm.perfbuf.vm); nvgpu_vm_put(g->mm.perfbuf.vm);
} }

View File

@@ -27,6 +27,7 @@
#include <nvgpu/gr/config.h> #include <nvgpu/gr/config.h>
#include <nvgpu/bug.h> #include <nvgpu/bug.h>
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/utils.h>
#include "perf_ga10b.h" #include "perf_ga10b.h"
@@ -401,6 +402,12 @@ void ga10b_perf_bind_mem_bytes_buffer_addr(struct gk20a *g, u64 buf_addr)
nvgpu_assert(perf_pmasys_channel_mem_bytes_addr__size_1_v() == nvgpu_assert(perf_pmasys_channel_mem_bytes_addr__size_1_v() ==
pmasys_channel_instance_max_size); pmasys_channel_instance_max_size);
/*
* For mem bytes addr, the upper 8 bits of the 40bit VA is taken
* from perf_pmasys_channel_outbaseupper_r(), so only consider
* the lower 32bits in the buf_addr and discard the rest.
*/
buf_addr = u64_lo32(buf_addr);
buf_addr = buf_addr >> perf_pmasys_channel_mem_bytes_addr_ptr_b(); buf_addr = buf_addr >> perf_pmasys_channel_mem_bytes_addr_ptr_b();
addr_lo = nvgpu_safe_cast_u64_to_u32(buf_addr); addr_lo = nvgpu_safe_cast_u64_to_u32(buf_addr);

View File

@@ -27,6 +27,7 @@
#include <nvgpu/gr/config.h> #include <nvgpu/gr/config.h>
#include <nvgpu/bug.h> #include <nvgpu/bug.h>
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
#include <nvgpu/utils.h>
#include "perf_gv11b.h" #include "perf_gv11b.h"
@@ -97,6 +98,12 @@ void gv11b_perf_bind_mem_bytes_buffer_addr(struct gk20a *g, u64 buf_addr)
{ {
u32 addr_lo; u32 addr_lo;
/*
* For mem bytes addr, the upper 8 bits of the 40bit VA is taken
* from perf_pmasys_channel_outbaseupper_r(), so only consider
* the lower 32bits in the buf_addr and discard the rest.
*/
buf_addr = u64_lo32(buf_addr);
buf_addr = buf_addr >> perf_pmasys_mem_bytes_addr_ptr_b(); buf_addr = buf_addr >> perf_pmasys_mem_bytes_addr_ptr_b();
addr_lo = nvgpu_safe_cast_u64_to_u32(buf_addr); addr_lo = nvgpu_safe_cast_u64_to_u32(buf_addr);

View File

@@ -296,6 +296,7 @@ struct mm_gk20a {
struct vm_gk20a *vm; struct vm_gk20a *vm;
struct nvgpu_mem inst_block; struct nvgpu_mem inst_block;
u64 pma_bytes_available_buffer_gpu_va; u64 pma_bytes_available_buffer_gpu_va;
u64 pma_buffer_gpu_va;
} perfbuf; } perfbuf;
/** /**

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -25,6 +25,14 @@
#ifdef CONFIG_NVGPU_DEBUGGER #ifdef CONFIG_NVGPU_DEBUGGER
/*
* Size of the GPU VA window within which the PMA unit is allowed to
* access.
*/
#define PERFBUF_PMA_MEM_WINDOW_SIZE SZ_4G
#define PERFBUF_PMA_BUF_MAX_SIZE 0xFFE00000ULL
#define PMA_BYTES_AVAILABLE_BUFFER_SIZE SZ_4K
#include <nvgpu/types.h> #include <nvgpu/types.h>
struct gk20a; struct gk20a;

View File

@@ -1583,10 +1583,11 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
goto err_release_pma; goto err_release_pma;
} }
args->offset = mm->perfbuf.pma_buffer_gpu_va;
err = nvgpu_vm_map_buffer(mm->perfbuf.vm, err = nvgpu_vm_map_buffer(mm->perfbuf.vm,
args->dmabuf_fd, args->dmabuf_fd,
&args->offset, &args->offset,
0, NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET,
SZ_4K, SZ_4K,
0, 0,
0, 0,

View File

@@ -363,6 +363,7 @@ static int nvgpu_prof_ioctl_alloc_pma_stream(struct nvgpu_profiler_object_priv *
struct gk20a *g = prof->g; struct gk20a *g = prof->g;
struct mm_gk20a *mm = &g->mm; struct mm_gk20a *mm = &g->mm;
u64 pma_bytes_available_buffer_offset; u64 pma_bytes_available_buffer_offset;
u64 pma_buffer_offset;
struct dma_buf *pma_dmabuf; struct dma_buf *pma_dmabuf;
struct dma_buf *pma_bytes_available_dmabuf; struct dma_buf *pma_bytes_available_dmabuf;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0) #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
@@ -386,27 +387,20 @@ static int nvgpu_prof_ioctl_alloc_pma_stream(struct nvgpu_profiler_object_priv *
return err; return err;
} }
/*
* PMA available byte buffer GPU_VA needs to fit in 32 bit
* register, hence use a fixed GPU_VA to map it.
*/
pma_bytes_available_buffer_offset = mm->perfbuf.pma_bytes_available_buffer_gpu_va; pma_bytes_available_buffer_offset = mm->perfbuf.pma_bytes_available_buffer_gpu_va;
err = nvgpu_vm_map_buffer(mm->perfbuf.vm, args->pma_bytes_available_buffer_fd, err = nvgpu_vm_map_buffer(mm->perfbuf.vm,
args->pma_bytes_available_buffer_fd,
&pma_bytes_available_buffer_offset, &pma_bytes_available_buffer_offset,
NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET, SZ_4K, 0, 0, NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET, SZ_4K, 0, 0,
0, 0, NULL); 0, PMA_BYTES_AVAILABLE_BUFFER_SIZE, NULL);
if (err != 0) { if (err != 0) {
nvgpu_err(g, "failed to map available bytes buffer"); nvgpu_err(g, "failed to map available bytes buffer");
goto err_put_vm; goto err_put_vm;
} }
/* if (args->pma_buffer_map_size > PERFBUF_PMA_BUF_MAX_SIZE) {
* Size register is 32-bit in HW, ensure requested size does nvgpu_err(g, "pma_buffer_map_size exceeds max size");
* not violate that.
*/
if (args->pma_buffer_map_size >= (1ULL << 32U)) {
nvgpu_err(g, "pma_buffer_map_size does not fit in 32 bits");
goto err_unmap_bytes_available; goto err_unmap_bytes_available;
} }
pma_buffer_size = nvgpu_safe_cast_u64_to_u32(args->pma_buffer_map_size); pma_buffer_size = nvgpu_safe_cast_u64_to_u32(args->pma_buffer_map_size);
@@ -426,9 +420,12 @@ static int nvgpu_prof_ioctl_alloc_pma_stream(struct nvgpu_profiler_object_priv *
goto err_dma_buf_put_pma; goto err_dma_buf_put_pma;
} }
pma_buffer_offset = mm->perfbuf.pma_buffer_gpu_va;
err = nvgpu_vm_map_buffer(mm->perfbuf.vm, args->pma_buffer_fd, err = nvgpu_vm_map_buffer(mm->perfbuf.vm, args->pma_buffer_fd,
&args->pma_buffer_offset, 0, SZ_4K, 0, 0, &pma_buffer_offset,
0, 0, NULL); NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET, SZ_4K, 0, 0,
args->pma_buffer_offset,
args->pma_buffer_map_size, NULL);
if (err != 0) { if (err != 0) {
nvgpu_err(g, "failed to map PMA buffer"); nvgpu_err(g, "failed to map PMA buffer");
goto err_dma_buf_put_pma; goto err_dma_buf_put_pma;
@@ -453,7 +450,7 @@ static int nvgpu_prof_ioctl_alloc_pma_stream(struct nvgpu_profiler_object_priv *
goto err_dma_buf_put_pma_bytes_available; goto err_dma_buf_put_pma_bytes_available;
} }
prof->pma_buffer_va = args->pma_buffer_offset; prof->pma_buffer_va = pma_buffer_offset;
prof->pma_buffer_size = pma_buffer_size; prof->pma_buffer_size = pma_buffer_size;
prof->pma_bytes_available_buffer_va = pma_bytes_available_buffer_offset; prof->pma_bytes_available_buffer_va = pma_bytes_available_buffer_offset;
prof->pma_bytes_available_buffer_cpuva = cpuva; prof->pma_bytes_available_buffer_cpuva = cpuva;
@@ -463,7 +460,7 @@ static int nvgpu_prof_ioctl_alloc_pma_stream(struct nvgpu_profiler_object_priv *
prof->prof_handle, prof->pma_buffer_va, prof->pma_buffer_size, prof->prof_handle, prof->pma_buffer_va, prof->pma_buffer_size,
prof->pma_bytes_available_buffer_va); prof->pma_bytes_available_buffer_va);
args->pma_buffer_va = args->pma_buffer_offset; args->pma_buffer_va = pma_buffer_offset;
/* Decrement pma_dmabuf ref count as we already mapped it. */ /* Decrement pma_dmabuf ref count as we already mapped it. */
dma_buf_put(pma_dmabuf); dma_buf_put(pma_dmabuf);