mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
gpu: nvgpu: Initial MAP_BUFFER_BATCH implementation
Add batch support for mapping and unmapping. Batching essentially helps transform some per-map/unmap overhead to per-batch overhead, namely gk20a_busy()/gk20a_idle() calls, GPU L2 flushes, and GPU TLB invalidates. Batching with size 64 has been measured to yield >20x speed-up in low-level fixed-address mapping microbenchmarks. Bug 1614735 Bug 1623949 Change-Id: Ie22b9caea5a7c3fc68a968d1b7f8488dfce72085 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/733231 (cherry picked from commit de4a7cfb93e8228a4a0c6a2815755a8df4531c91) Reviewed-on: http://git-master/r/763812 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Terje Bergstrom
parent
ae7b988b0d
commit
e7ba93fefb
@@ -151,8 +151,8 @@ static int gk20a_as_ioctl_map_buffer_ex(
|
|||||||
&args->offset, args->flags,
|
&args->offset, args->flags,
|
||||||
args->kind,
|
args->kind,
|
||||||
args->buffer_offset,
|
args->buffer_offset,
|
||||||
args->mapping_size
|
args->mapping_size,
|
||||||
);
|
NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gk20a_as_ioctl_map_buffer(
|
static int gk20a_as_ioctl_map_buffer(
|
||||||
@@ -163,7 +163,7 @@ static int gk20a_as_ioctl_map_buffer(
|
|||||||
return gk20a_vm_map_buffer(as_share->vm, args->dmabuf_fd,
|
return gk20a_vm_map_buffer(as_share->vm, args->dmabuf_fd,
|
||||||
&args->o_a.offset,
|
&args->o_a.offset,
|
||||||
args->flags, NV_KIND_DEFAULT,
|
args->flags, NV_KIND_DEFAULT,
|
||||||
0, 0);
|
0, 0, NULL);
|
||||||
/* args->o_a.offset will be set if !err */
|
/* args->o_a.offset will be set if !err */
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -172,7 +172,86 @@ static int gk20a_as_ioctl_unmap_buffer(
|
|||||||
struct nvgpu_as_unmap_buffer_args *args)
|
struct nvgpu_as_unmap_buffer_args *args)
|
||||||
{
|
{
|
||||||
gk20a_dbg_fn("");
|
gk20a_dbg_fn("");
|
||||||
return gk20a_vm_unmap_buffer(as_share->vm, args->offset);
|
return gk20a_vm_unmap_buffer(as_share->vm, args->offset, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int gk20a_as_ioctl_map_buffer_batch(
|
||||||
|
struct gk20a_as_share *as_share,
|
||||||
|
struct nvgpu_as_map_buffer_batch_args *args)
|
||||||
|
{
|
||||||
|
struct gk20a *g = as_share->vm->mm->g;
|
||||||
|
u32 i;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
struct nvgpu_as_unmap_buffer_args __user *user_unmap_args =
|
||||||
|
(struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t)
|
||||||
|
args->unmaps;
|
||||||
|
struct nvgpu_as_map_buffer_ex_args __user *user_map_args =
|
||||||
|
(struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t)
|
||||||
|
args->maps;
|
||||||
|
|
||||||
|
struct vm_gk20a_mapping_batch batch;
|
||||||
|
|
||||||
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
if (args->num_unmaps > g->gpu_characteristics.map_buffer_batch_limit ||
|
||||||
|
args->num_maps > g->gpu_characteristics.map_buffer_batch_limit)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
gk20a_vm_mapping_batch_start(&batch);
|
||||||
|
|
||||||
|
for (i = 0; i < args->num_unmaps; ++i) {
|
||||||
|
struct nvgpu_as_unmap_buffer_args unmap_args;
|
||||||
|
|
||||||
|
if (copy_from_user(&unmap_args, &user_unmap_args[i],
|
||||||
|
sizeof(unmap_args))) {
|
||||||
|
err = -EFAULT;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = gk20a_vm_unmap_buffer(as_share->vm, unmap_args.offset,
|
||||||
|
&batch);
|
||||||
|
if (err)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err) {
|
||||||
|
gk20a_vm_mapping_batch_finish(as_share->vm, &batch);
|
||||||
|
|
||||||
|
args->num_unmaps = i;
|
||||||
|
args->num_maps = 0;
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < args->num_maps; ++i) {
|
||||||
|
struct nvgpu_as_map_buffer_ex_args map_args;
|
||||||
|
memset(&map_args, 0, sizeof(map_args));
|
||||||
|
|
||||||
|
if (copy_from_user(&map_args, &user_map_args[i],
|
||||||
|
sizeof(map_args))) {
|
||||||
|
err = -EFAULT;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = gk20a_vm_map_buffer(
|
||||||
|
as_share->vm, map_args.dmabuf_fd,
|
||||||
|
&map_args.offset, map_args.flags,
|
||||||
|
map_args.kind,
|
||||||
|
map_args.buffer_offset,
|
||||||
|
map_args.mapping_size,
|
||||||
|
&batch);
|
||||||
|
if (err)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
gk20a_vm_mapping_batch_finish(as_share->vm, &batch);
|
||||||
|
|
||||||
|
if (err)
|
||||||
|
args->num_maps = i;
|
||||||
|
/* note: args->num_unmaps will be unmodified, which is ok
|
||||||
|
* since all unmaps are done */
|
||||||
|
|
||||||
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gk20a_as_ioctl_get_va_regions(
|
static int gk20a_as_ioctl_get_va_regions(
|
||||||
@@ -360,6 +439,10 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|||||||
err = gk20a_as_ioctl_map_buffer_compbits(as_share,
|
err = gk20a_as_ioctl_map_buffer_compbits(as_share,
|
||||||
(struct nvgpu_as_map_buffer_compbits_args *)buf);
|
(struct nvgpu_as_map_buffer_compbits_args *)buf);
|
||||||
break;
|
break;
|
||||||
|
case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH:
|
||||||
|
err = gk20a_as_ioctl_map_buffer_batch(as_share,
|
||||||
|
(struct nvgpu_as_map_buffer_batch_args *)buf);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd);
|
dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd);
|
||||||
err = -ENOTTY;
|
err = -ENOTTY;
|
||||||
|
|||||||
@@ -952,7 +952,8 @@ __releases(&cde_app->mutex)
|
|||||||
NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
|
NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
|
||||||
compbits_kind, NULL, true,
|
compbits_kind, NULL, true,
|
||||||
gk20a_mem_flag_none,
|
gk20a_mem_flag_none,
|
||||||
map_offset, map_size);
|
map_offset, map_size,
|
||||||
|
NULL);
|
||||||
if (!map_vaddr) {
|
if (!map_vaddr) {
|
||||||
dma_buf_put(compbits_buf);
|
dma_buf_put(compbits_buf);
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
|
|||||||
@@ -828,7 +828,8 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
|
|||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
args->mapping_size);
|
args->mapping_size,
|
||||||
|
NULL);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
@@ -839,7 +840,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
|
|||||||
virt_addr_hi = u64_hi32(args->offset);
|
virt_addr_hi = u64_hi32(args->offset);
|
||||||
/* but check anyway */
|
/* but check anyway */
|
||||||
if (args->offset + virt_size > SZ_4G) {
|
if (args->offset + virt_size > SZ_4G) {
|
||||||
gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset);
|
gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -881,7 +882,7 @@ static int gk20a_perfbuf_unmap(struct dbg_session_gk20a *dbg_s,
|
|||||||
perf_pmasys_mem_block_valid_false_f() |
|
perf_pmasys_mem_block_valid_false_f() |
|
||||||
perf_pmasys_mem_block_target_f(0));
|
perf_pmasys_mem_block_target_f(0));
|
||||||
|
|
||||||
gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset);
|
gk20a_vm_unmap_buffer(&g->mm.pmu.vm, args->offset, NULL);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2033,6 +2033,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
|
|||||||
gpu->cbc_cache_line_size = g->gr.cacheline_size;
|
gpu->cbc_cache_line_size = g->gr.cacheline_size;
|
||||||
gpu->cbc_comptags_per_line = g->gr.comptags_per_cacheline;
|
gpu->cbc_comptags_per_line = g->gr.comptags_per_cacheline;
|
||||||
|
|
||||||
|
gpu->map_buffer_batch_limit = 256;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -349,14 +349,16 @@ struct gpu_ops {
|
|||||||
u32 flags,
|
u32 flags,
|
||||||
int rw_flag,
|
int rw_flag,
|
||||||
bool clear_ctags,
|
bool clear_ctags,
|
||||||
bool sparse);
|
bool sparse,
|
||||||
|
struct vm_gk20a_mapping_batch *batch);
|
||||||
void (*gmmu_unmap)(struct vm_gk20a *vm,
|
void (*gmmu_unmap)(struct vm_gk20a *vm,
|
||||||
u64 vaddr,
|
u64 vaddr,
|
||||||
u64 size,
|
u64 size,
|
||||||
int pgsz_idx,
|
int pgsz_idx,
|
||||||
bool va_allocated,
|
bool va_allocated,
|
||||||
int rw_flag,
|
int rw_flag,
|
||||||
bool sparse);
|
bool sparse,
|
||||||
|
struct vm_gk20a_mapping_batch *batch);
|
||||||
void (*vm_remove)(struct vm_gk20a *vm);
|
void (*vm_remove)(struct vm_gk20a *vm);
|
||||||
int (*vm_alloc_share)(struct gk20a_as_share *as_share,
|
int (*vm_alloc_share)(struct gk20a_as_share *as_share,
|
||||||
u32 flags);
|
u32 flags);
|
||||||
|
|||||||
@@ -787,7 +787,34 @@ static void gk20a_vm_unmap_locked_kref(struct kref *ref)
|
|||||||
{
|
{
|
||||||
struct mapped_buffer_node *mapped_buffer =
|
struct mapped_buffer_node *mapped_buffer =
|
||||||
container_of(ref, struct mapped_buffer_node, ref);
|
container_of(ref, struct mapped_buffer_node, ref);
|
||||||
gk20a_vm_unmap_locked(mapped_buffer);
|
gk20a_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch)
|
||||||
|
{
|
||||||
|
memset(mapping_batch, 0, sizeof(*mapping_batch));
|
||||||
|
mapping_batch->gpu_l2_flushed = false;
|
||||||
|
mapping_batch->need_tlb_invalidate = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gk20a_vm_mapping_batch_finish_locked(
|
||||||
|
struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *mapping_batch)
|
||||||
|
{
|
||||||
|
/* hanging kref_put batch pointer? */
|
||||||
|
WARN_ON(vm->kref_put_batch == mapping_batch);
|
||||||
|
|
||||||
|
if (mapping_batch->need_tlb_invalidate) {
|
||||||
|
struct gk20a *g = gk20a_from_vm(vm);
|
||||||
|
g->ops.mm.tlb_invalidate(vm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gk20a_vm_mapping_batch_finish(struct vm_gk20a *vm,
|
||||||
|
struct vm_gk20a_mapping_batch *mapping_batch)
|
||||||
|
{
|
||||||
|
mutex_lock(&vm->update_gmmu_lock);
|
||||||
|
gk20a_vm_mapping_batch_finish_locked(vm, mapping_batch);
|
||||||
|
mutex_unlock(&vm->update_gmmu_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gk20a_vm_put_buffers(struct vm_gk20a *vm,
|
void gk20a_vm_put_buffers(struct vm_gk20a *vm,
|
||||||
@@ -795,19 +822,25 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
|
|||||||
int num_buffers)
|
int num_buffers)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
struct vm_gk20a_mapping_batch batch;
|
||||||
|
|
||||||
mutex_lock(&vm->update_gmmu_lock);
|
mutex_lock(&vm->update_gmmu_lock);
|
||||||
|
gk20a_vm_mapping_batch_start(&batch);
|
||||||
|
vm->kref_put_batch = &batch;
|
||||||
|
|
||||||
for (i = 0; i < num_buffers; ++i)
|
for (i = 0; i < num_buffers; ++i)
|
||||||
kref_put(&mapped_buffers[i]->ref,
|
kref_put(&mapped_buffers[i]->ref,
|
||||||
gk20a_vm_unmap_locked_kref);
|
gk20a_vm_unmap_locked_kref);
|
||||||
|
|
||||||
|
vm->kref_put_batch = NULL;
|
||||||
|
gk20a_vm_mapping_batch_finish_locked(vm, &batch);
|
||||||
mutex_unlock(&vm->update_gmmu_lock);
|
mutex_unlock(&vm->update_gmmu_lock);
|
||||||
|
|
||||||
nvgpu_free(mapped_buffers);
|
nvgpu_free(mapped_buffers);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
|
static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
|
||||||
|
struct vm_gk20a_mapping_batch *batch)
|
||||||
{
|
{
|
||||||
struct device *d = dev_from_vm(vm);
|
struct device *d = dev_from_vm(vm);
|
||||||
int retries = 10000; /* 50 ms */
|
int retries = 10000; /* 50 ms */
|
||||||
@@ -840,7 +873,10 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
|
|||||||
mapped_buffer->user_mapped--;
|
mapped_buffer->user_mapped--;
|
||||||
if (mapped_buffer->user_mapped == 0)
|
if (mapped_buffer->user_mapped == 0)
|
||||||
vm->num_user_mapped_buffers--;
|
vm->num_user_mapped_buffers--;
|
||||||
|
|
||||||
|
vm->kref_put_batch = batch;
|
||||||
kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
|
kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
|
||||||
|
vm->kref_put_batch = NULL;
|
||||||
|
|
||||||
mutex_unlock(&vm->update_gmmu_lock);
|
mutex_unlock(&vm->update_gmmu_lock);
|
||||||
}
|
}
|
||||||
@@ -1131,7 +1167,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
|
|||||||
u32 flags,
|
u32 flags,
|
||||||
int rw_flag,
|
int rw_flag,
|
||||||
bool clear_ctags,
|
bool clear_ctags,
|
||||||
bool sparse)
|
bool sparse,
|
||||||
|
struct vm_gk20a_mapping_batch *batch)
|
||||||
{
|
{
|
||||||
int err = 0;
|
int err = 0;
|
||||||
bool allocated = false;
|
bool allocated = false;
|
||||||
@@ -1177,7 +1214,10 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
|
|||||||
goto fail_validate;
|
goto fail_validate;
|
||||||
}
|
}
|
||||||
|
|
||||||
g->ops.mm.tlb_invalidate(vm);
|
if (!batch)
|
||||||
|
g->ops.mm.tlb_invalidate(vm);
|
||||||
|
else
|
||||||
|
batch->need_tlb_invalidate = true;
|
||||||
|
|
||||||
return map_offset;
|
return map_offset;
|
||||||
fail_validate:
|
fail_validate:
|
||||||
@@ -1194,7 +1234,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
|
|||||||
int pgsz_idx,
|
int pgsz_idx,
|
||||||
bool va_allocated,
|
bool va_allocated,
|
||||||
int rw_flag,
|
int rw_flag,
|
||||||
bool sparse)
|
bool sparse,
|
||||||
|
struct vm_gk20a_mapping_batch *batch)
|
||||||
{
|
{
|
||||||
int err = 0;
|
int err = 0;
|
||||||
struct gk20a *g = gk20a_from_vm(vm);
|
struct gk20a *g = gk20a_from_vm(vm);
|
||||||
@@ -1230,9 +1271,16 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
|
|||||||
* for gmmu ptes. note the positioning of this relative to any smmu
|
* for gmmu ptes. note the positioning of this relative to any smmu
|
||||||
* unmapping (below). */
|
* unmapping (below). */
|
||||||
|
|
||||||
gk20a_mm_l2_flush(g, true);
|
if (!batch) {
|
||||||
|
gk20a_mm_l2_flush(g, true);
|
||||||
g->ops.mm.tlb_invalidate(vm);
|
g->ops.mm.tlb_invalidate(vm);
|
||||||
|
} else {
|
||||||
|
if (!batch->gpu_l2_flushed) {
|
||||||
|
gk20a_mm_l2_flush(g, true);
|
||||||
|
batch->gpu_l2_flushed = true;
|
||||||
|
}
|
||||||
|
batch->need_tlb_invalidate = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
|
static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
|
||||||
@@ -1308,7 +1356,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
|
|||||||
bool user_mapped,
|
bool user_mapped,
|
||||||
int rw_flag,
|
int rw_flag,
|
||||||
u64 buffer_offset,
|
u64 buffer_offset,
|
||||||
u64 mapping_size)
|
u64 mapping_size,
|
||||||
|
struct vm_gk20a_mapping_batch *batch)
|
||||||
{
|
{
|
||||||
struct gk20a *g = gk20a_from_vm(vm);
|
struct gk20a *g = gk20a_from_vm(vm);
|
||||||
struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags;
|
struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags;
|
||||||
@@ -1509,7 +1558,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
|
|||||||
bfr.ctag_offset,
|
bfr.ctag_offset,
|
||||||
flags, rw_flag,
|
flags, rw_flag,
|
||||||
clear_ctags,
|
clear_ctags,
|
||||||
false);
|
false,
|
||||||
|
batch);
|
||||||
if (!map_offset)
|
if (!map_offset)
|
||||||
goto clean_up;
|
goto clean_up;
|
||||||
|
|
||||||
@@ -1727,8 +1777,9 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
|
|||||||
0, /* ctag_offset */
|
0, /* ctag_offset */
|
||||||
NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
|
NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
|
||||||
gk20a_mem_flag_read_only,
|
gk20a_mem_flag_read_only,
|
||||||
false,
|
false, /* clear_ctags */
|
||||||
false);
|
false, /* sparse */
|
||||||
|
NULL); /* mapping_batch handle */
|
||||||
|
|
||||||
if (!mapped_buffer->ctag_map_win_addr) {
|
if (!mapped_buffer->ctag_map_win_addr) {
|
||||||
mutex_unlock(&vm->update_gmmu_lock);
|
mutex_unlock(&vm->update_gmmu_lock);
|
||||||
@@ -1764,7 +1815,10 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
|
|||||||
0, /* page size index = 0 i.e. SZ_4K */
|
0, /* page size index = 0 i.e. SZ_4K */
|
||||||
0, /* kind */
|
0, /* kind */
|
||||||
0, /* ctag_offset */
|
0, /* ctag_offset */
|
||||||
flags, rw_flag, false, false);
|
flags, rw_flag,
|
||||||
|
false, /* clear_ctags */
|
||||||
|
false, /* sparse */
|
||||||
|
NULL); /* mapping_batch handle */
|
||||||
mutex_unlock(&vm->update_gmmu_lock);
|
mutex_unlock(&vm->update_gmmu_lock);
|
||||||
if (!vaddr) {
|
if (!vaddr) {
|
||||||
gk20a_err(dev_from_vm(vm), "failed to allocate va space");
|
gk20a_err(dev_from_vm(vm), "failed to allocate va space");
|
||||||
@@ -1930,7 +1984,8 @@ void gk20a_gmmu_unmap(struct vm_gk20a *vm,
|
|||||||
0, /* page size 4K */
|
0, /* page size 4K */
|
||||||
true, /*va_allocated */
|
true, /*va_allocated */
|
||||||
rw_flag,
|
rw_flag,
|
||||||
false);
|
false,
|
||||||
|
NULL);
|
||||||
mutex_unlock(&vm->update_gmmu_lock);
|
mutex_unlock(&vm->update_gmmu_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2378,7 +2433,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* NOTE! mapped_buffers lock must be held */
|
/* NOTE! mapped_buffers lock must be held */
|
||||||
void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
|
void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
|
||||||
|
struct vm_gk20a_mapping_batch *batch)
|
||||||
{
|
{
|
||||||
struct vm_gk20a *vm = mapped_buffer->vm;
|
struct vm_gk20a *vm = mapped_buffer->vm;
|
||||||
struct gk20a *g = vm->mm->g;
|
struct gk20a *g = vm->mm->g;
|
||||||
@@ -2392,7 +2448,8 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
|
|||||||
0, /* page size 4k */
|
0, /* page size 4k */
|
||||||
true, /* va allocated */
|
true, /* va allocated */
|
||||||
gk20a_mem_flag_none,
|
gk20a_mem_flag_none,
|
||||||
false); /* not sparse */
|
false, /* not sparse */
|
||||||
|
batch); /* batch handle */
|
||||||
}
|
}
|
||||||
|
|
||||||
g->ops.mm.gmmu_unmap(vm,
|
g->ops.mm.gmmu_unmap(vm,
|
||||||
@@ -2402,7 +2459,8 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
|
|||||||
mapped_buffer->va_allocated,
|
mapped_buffer->va_allocated,
|
||||||
gk20a_mem_flag_none,
|
gk20a_mem_flag_none,
|
||||||
mapped_buffer->va_node ?
|
mapped_buffer->va_node ?
|
||||||
mapped_buffer->va_node->sparse : false);
|
mapped_buffer->va_node->sparse : false,
|
||||||
|
batch);
|
||||||
|
|
||||||
gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d",
|
gk20a_dbg(gpu_dbg_map, "as=%d pgsz=%d gv=0x%x,%08x own_mem_ref=%d",
|
||||||
vm_aspace_id(vm),
|
vm_aspace_id(vm),
|
||||||
@@ -2479,7 +2537,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
|
|||||||
while (node) {
|
while (node) {
|
||||||
mapped_buffer =
|
mapped_buffer =
|
||||||
container_of(node, struct mapped_buffer_node, node);
|
container_of(node, struct mapped_buffer_node, node);
|
||||||
gk20a_vm_unmap_locked(mapped_buffer);
|
gk20a_vm_unmap_locked(mapped_buffer, NULL);
|
||||||
node = rb_first(&vm->mapped_buffers);
|
node = rb_first(&vm->mapped_buffers);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2776,7 +2834,8 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
|
|||||||
args->flags,
|
args->flags,
|
||||||
gk20a_mem_flag_none,
|
gk20a_mem_flag_none,
|
||||||
false,
|
false,
|
||||||
true);
|
true,
|
||||||
|
NULL);
|
||||||
if (!map_offset) {
|
if (!map_offset) {
|
||||||
mutex_unlock(&vm->update_gmmu_lock);
|
mutex_unlock(&vm->update_gmmu_lock);
|
||||||
gk20a_bfree(vma, vaddr_start);
|
gk20a_bfree(vma, vaddr_start);
|
||||||
@@ -2841,7 +2900,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
|
|||||||
va_node->pgsz_idx,
|
va_node->pgsz_idx,
|
||||||
true,
|
true,
|
||||||
gk20a_mem_flag_none,
|
gk20a_mem_flag_none,
|
||||||
true);
|
true,
|
||||||
|
NULL);
|
||||||
kfree(va_node);
|
kfree(va_node);
|
||||||
}
|
}
|
||||||
mutex_unlock(&vm->update_gmmu_lock);
|
mutex_unlock(&vm->update_gmmu_lock);
|
||||||
@@ -2960,7 +3020,8 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm,
|
|||||||
u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
|
u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
|
||||||
int kind,
|
int kind,
|
||||||
u64 buffer_offset,
|
u64 buffer_offset,
|
||||||
u64 mapping_size)
|
u64 mapping_size,
|
||||||
|
struct vm_gk20a_mapping_batch *batch)
|
||||||
{
|
{
|
||||||
int err = 0;
|
int err = 0;
|
||||||
struct dma_buf *dmabuf;
|
struct dma_buf *dmabuf;
|
||||||
@@ -2986,7 +3047,8 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm,
|
|||||||
flags, kind, NULL, true,
|
flags, kind, NULL, true,
|
||||||
gk20a_mem_flag_none,
|
gk20a_mem_flag_none,
|
||||||
buffer_offset,
|
buffer_offset,
|
||||||
mapping_size);
|
mapping_size,
|
||||||
|
batch);
|
||||||
|
|
||||||
*offset_align = ret_va;
|
*offset_align = ret_va;
|
||||||
if (!ret_va) {
|
if (!ret_va) {
|
||||||
@@ -2997,11 +3059,12 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset)
|
int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
|
||||||
|
struct vm_gk20a_mapping_batch *batch)
|
||||||
{
|
{
|
||||||
gk20a_dbg_fn("");
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
gk20a_vm_unmap_user(vm, offset);
|
gk20a_vm_unmap_user(vm, offset, batch);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -225,6 +225,13 @@ struct gk20a_mmu_level {
|
|||||||
size_t entry_size;
|
size_t entry_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* map/unmap batch state */
|
||||||
|
struct vm_gk20a_mapping_batch
|
||||||
|
{
|
||||||
|
bool gpu_l2_flushed;
|
||||||
|
bool need_tlb_invalidate;
|
||||||
|
};
|
||||||
|
|
||||||
struct vm_gk20a {
|
struct vm_gk20a {
|
||||||
struct mm_gk20a *mm;
|
struct mm_gk20a *mm;
|
||||||
struct gk20a_as_share *as_share; /* as_share this represents */
|
struct gk20a_as_share *as_share; /* as_share this represents */
|
||||||
@@ -257,6 +264,10 @@ struct vm_gk20a {
|
|||||||
u64 handle;
|
u64 handle;
|
||||||
#endif
|
#endif
|
||||||
u32 gmmu_page_sizes[gmmu_nr_page_sizes];
|
u32 gmmu_page_sizes[gmmu_nr_page_sizes];
|
||||||
|
|
||||||
|
/* if non-NULL, kref_put will use this batch when
|
||||||
|
unmapping. Must hold vm->update_gmmu_lock. */
|
||||||
|
struct vm_gk20a_mapping_batch *kref_put_batch;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct gk20a;
|
struct gk20a;
|
||||||
@@ -486,7 +497,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
|
|||||||
u32 flags,
|
u32 flags,
|
||||||
int rw_flag,
|
int rw_flag,
|
||||||
bool clear_ctags,
|
bool clear_ctags,
|
||||||
bool sparse);
|
bool sparse,
|
||||||
|
struct vm_gk20a_mapping_batch *batch);
|
||||||
|
|
||||||
void gk20a_gmmu_unmap(struct vm_gk20a *vm,
|
void gk20a_gmmu_unmap(struct vm_gk20a *vm,
|
||||||
u64 vaddr,
|
u64 vaddr,
|
||||||
@@ -499,7 +511,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
|
|||||||
int pgsz_idx,
|
int pgsz_idx,
|
||||||
bool va_allocated,
|
bool va_allocated,
|
||||||
int rw_flag,
|
int rw_flag,
|
||||||
bool sparse);
|
bool sparse,
|
||||||
|
struct vm_gk20a_mapping_batch *batch);
|
||||||
|
|
||||||
struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
|
struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
|
||||||
void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
|
void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
|
||||||
@@ -514,7 +527,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
|
|||||||
bool user_mapped,
|
bool user_mapped,
|
||||||
int rw_flag,
|
int rw_flag,
|
||||||
u64 buffer_offset,
|
u64 buffer_offset,
|
||||||
u64 mapping_size);
|
u64 mapping_size,
|
||||||
|
struct vm_gk20a_mapping_batch *mapping_batch);
|
||||||
|
|
||||||
int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
|
int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
|
||||||
u64 mapping_gva,
|
u64 mapping_gva,
|
||||||
@@ -532,7 +546,8 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
|
|||||||
/* unmap handle from kernel */
|
/* unmap handle from kernel */
|
||||||
void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
|
void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
|
||||||
|
|
||||||
void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
|
void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
|
||||||
|
struct vm_gk20a_mapping_batch *batch);
|
||||||
|
|
||||||
/* get reference to all currently mapped buffers */
|
/* get reference to all currently mapped buffers */
|
||||||
int gk20a_vm_get_buffers(struct vm_gk20a *vm,
|
int gk20a_vm_get_buffers(struct vm_gk20a *vm,
|
||||||
@@ -576,13 +591,25 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
|
|||||||
struct nvgpu_as_free_space_args *args);
|
struct nvgpu_as_free_space_args *args);
|
||||||
int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
|
int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
|
||||||
struct channel_gk20a *ch);
|
struct channel_gk20a *ch);
|
||||||
|
|
||||||
|
/* batching eliminates redundant cache flushes and invalidates */
|
||||||
|
void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
|
||||||
|
void gk20a_vm_mapping_batch_finish(
|
||||||
|
struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
|
||||||
|
/* called when holding vm->update_gmmu_lock */
|
||||||
|
void gk20a_vm_mapping_batch_finish_locked(
|
||||||
|
struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
|
||||||
|
|
||||||
|
|
||||||
|
/* Note: batch may be NULL if map op is not part of a batch */
|
||||||
int gk20a_vm_map_buffer(struct vm_gk20a *vm,
|
int gk20a_vm_map_buffer(struct vm_gk20a *vm,
|
||||||
int dmabuf_fd,
|
int dmabuf_fd,
|
||||||
u64 *offset_align,
|
u64 *offset_align,
|
||||||
u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */
|
u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */
|
||||||
int kind,
|
int kind,
|
||||||
u64 buffer_offset,
|
u64 buffer_offset,
|
||||||
u64 mapping_size);
|
u64 mapping_size,
|
||||||
|
struct vm_gk20a_mapping_batch *batch);
|
||||||
|
|
||||||
int gk20a_init_vm(struct mm_gk20a *mm,
|
int gk20a_init_vm(struct mm_gk20a *mm,
|
||||||
struct vm_gk20a *vm,
|
struct vm_gk20a *vm,
|
||||||
@@ -592,7 +619,10 @@ int gk20a_init_vm(struct mm_gk20a *mm,
|
|||||||
bool big_pages,
|
bool big_pages,
|
||||||
char *name);
|
char *name);
|
||||||
void gk20a_deinit_vm(struct vm_gk20a *vm);
|
void gk20a_deinit_vm(struct vm_gk20a *vm);
|
||||||
int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset);
|
|
||||||
|
/* Note: batch may be NULL if unmap op is not part of a batch */
|
||||||
|
int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
|
||||||
|
struct vm_gk20a_mapping_batch *batch);
|
||||||
void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
|
void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
|
||||||
struct gk20a_comptags *comptags);
|
struct gk20a_comptags *comptags);
|
||||||
dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr);
|
dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr);
|
||||||
|
|||||||
@@ -66,7 +66,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
|
|||||||
u32 flags,
|
u32 flags,
|
||||||
int rw_flag,
|
int rw_flag,
|
||||||
bool clear_ctags,
|
bool clear_ctags,
|
||||||
bool sparse)
|
bool sparse,
|
||||||
|
struct vm_gk20a_mapping_batch *batch)
|
||||||
{
|
{
|
||||||
int err = 0;
|
int err = 0;
|
||||||
struct device *d = dev_from_vm(vm);
|
struct device *d = dev_from_vm(vm);
|
||||||
@@ -130,7 +131,8 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
|
|||||||
int pgsz_idx,
|
int pgsz_idx,
|
||||||
bool va_allocated,
|
bool va_allocated,
|
||||||
int rw_flag,
|
int rw_flag,
|
||||||
bool sparse)
|
bool sparse,
|
||||||
|
struct vm_gk20a_mapping_batch *batch)
|
||||||
{
|
{
|
||||||
struct gk20a *g = gk20a_from_vm(vm);
|
struct gk20a *g = gk20a_from_vm(vm);
|
||||||
struct gk20a_platform *platform = gk20a_get_platform(g->dev);
|
struct gk20a_platform *platform = gk20a_get_platform(g->dev);
|
||||||
@@ -182,7 +184,7 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
|
|||||||
while (node) {
|
while (node) {
|
||||||
mapped_buffer =
|
mapped_buffer =
|
||||||
container_of(node, struct mapped_buffer_node, node);
|
container_of(node, struct mapped_buffer_node, node);
|
||||||
gk20a_vm_unmap_locked(mapped_buffer);
|
gk20a_vm_unmap_locked(mapped_buffer, NULL);
|
||||||
node = rb_first(&vm->mapped_buffers);
|
node = rb_first(&vm->mapped_buffers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -172,7 +172,10 @@ struct nvgpu_gpu_characteristics {
|
|||||||
__u32 lts_per_ltc;
|
__u32 lts_per_ltc;
|
||||||
__u32 cbc_cache_line_size;
|
__u32 cbc_cache_line_size;
|
||||||
__u32 cbc_comptags_per_line;
|
__u32 cbc_comptags_per_line;
|
||||||
__u32 reserved2;
|
|
||||||
|
/* MAP_BUFFER_BATCH: the upper limit for num_unmaps and
|
||||||
|
* num_maps */
|
||||||
|
__u32 map_buffer_batch_limit;
|
||||||
|
|
||||||
/* Notes:
|
/* Notes:
|
||||||
- This struct can be safely appended with new fields. However, always
|
- This struct can be safely appended with new fields. However, always
|
||||||
@@ -1031,6 +1034,16 @@ struct nvgpu_as_get_va_regions_args {
|
|||||||
__u32 reserved;
|
__u32 reserved;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct nvgpu_as_map_buffer_batch_args {
|
||||||
|
__u64 unmaps; /* ptr to array of nvgpu_unmap_buffer_args */
|
||||||
|
__u64 maps; /* ptr to array of nvgpu_as_map_buffer_ex_args */
|
||||||
|
__u32 num_unmaps; /* in: number of unmaps
|
||||||
|
* out: on error, number of successful unmaps */
|
||||||
|
__u32 num_maps; /* in: number of maps
|
||||||
|
* out: on error, number of successful maps */
|
||||||
|
__u64 reserved;
|
||||||
|
};
|
||||||
|
|
||||||
#define NVGPU_AS_IOCTL_BIND_CHANNEL \
|
#define NVGPU_AS_IOCTL_BIND_CHANNEL \
|
||||||
_IOWR(NVGPU_AS_IOCTL_MAGIC, 1, struct nvgpu_as_bind_channel_args)
|
_IOWR(NVGPU_AS_IOCTL_MAGIC, 1, struct nvgpu_as_bind_channel_args)
|
||||||
#define NVGPU32_AS_IOCTL_ALLOC_SPACE \
|
#define NVGPU32_AS_IOCTL_ALLOC_SPACE \
|
||||||
@@ -1051,9 +1064,11 @@ struct nvgpu_as_get_va_regions_args {
|
|||||||
_IOWR(NVGPU_AS_IOCTL_MAGIC, 9, struct nvgpu_as_get_buffer_compbits_info_args)
|
_IOWR(NVGPU_AS_IOCTL_MAGIC, 9, struct nvgpu_as_get_buffer_compbits_info_args)
|
||||||
#define NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS \
|
#define NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS \
|
||||||
_IOWR(NVGPU_AS_IOCTL_MAGIC, 10, struct nvgpu_as_map_buffer_compbits_args)
|
_IOWR(NVGPU_AS_IOCTL_MAGIC, 10, struct nvgpu_as_map_buffer_compbits_args)
|
||||||
|
#define NVGPU_AS_IOCTL_MAP_BUFFER_BATCH \
|
||||||
|
_IOWR(NVGPU_AS_IOCTL_MAGIC, 11, struct nvgpu_as_map_buffer_batch_args)
|
||||||
|
|
||||||
#define NVGPU_AS_IOCTL_LAST \
|
#define NVGPU_AS_IOCTL_LAST \
|
||||||
_IOC_NR(NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS)
|
_IOC_NR(NVGPU_AS_IOCTL_MAP_BUFFER_BATCH)
|
||||||
#define NVGPU_AS_IOCTL_MAX_ARG_SIZE \
|
#define NVGPU_AS_IOCTL_MAX_ARG_SIZE \
|
||||||
sizeof(struct nvgpu_as_map_buffer_ex_args)
|
sizeof(struct nvgpu_as_map_buffer_ex_args)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user