mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 09:57:08 +03:00
gpu: nvgpu: Initial MAP_BUFFER_BATCH implementation
Add batch support for mapping and unmapping. Batching essentially helps transform some per-map/unmap overhead to per-batch overhead, namely gk20a_busy()/gk20a_idle() calls, GPU L2 flushes, and GPU TLB invalidates. Batching with size 64 has been measured to yield >20x speed-up in low-level fixed-address mapping microbenchmarks. Bug 1614735 Bug 1623949 Change-Id: Ie22b9caea5a7c3fc68a968d1b7f8488dfce72085 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/733231 (cherry picked from commit de4a7cfb93e8228a4a0c6a2815755a8df4531c91) Reviewed-on: http://git-master/r/763812 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Terje Bergstrom
parent
ae7b988b0d
commit
e7ba93fefb
@@ -828,7 +828,8 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
args->mapping_size);
|
||||
args->mapping_size,
|
||||
NULL);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@@ -839,7 +840,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
|
||||
virt_addr_hi = u64_hi32(args->offset);
|
||||
/* but check anyway */
|
||||
if (args->offset + virt_size > SZ_4G) {
|
||||
gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset);
|
||||
gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -881,7 +882,7 @@ static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
|
||||
perf_pmasys_mem_block_valid_false_f() |
|
||||
perf_pmasys_mem_block_target_f(0));
|
||||
|
||||
gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset);
|
||||
gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user