mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: pd_cache enablement for >4k allocations in qnx
Mapping of large buffers to GMMU end up needing many pages for the PTE tables. Allocating these one by one can end up being a performance bottleneck, particularly in the virtualized case. This is adding the following changes: - As the TLB invalidation doesn't have access to mem_off, allow top-level allocation by alloc_cache_direct(). - Define NVGPU_PD_CACHE_SIZE, the allocation size for a new slab for the PD cache, effectively set to 64K bytes - Use the PD cache for any allocation < NVGPU_PD_CACHE_SIZE When freeing up cached entries, avoid prefetch errors by invalidating the entry (memset to 0). - Try to fall back to direct allocation of smaller chunk for contiguous allocation failures. - Unit test changes. Bug 200649243 Change-Id: I0a667af0ba01d9147c703e64fc970880e52a8fbc Signed-off-by: dt <dt@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2404371 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
Alex Waterman
parent
94bc3a8135
commit
a331fd4b3a
@@ -308,6 +308,11 @@ static int init_test_env(struct unit_module *m, struct gk20a *g)
|
||||
unit_return_fail(m, "ecc init failed\n");
|
||||
}
|
||||
|
||||
err = nvgpu_pd_cache_init(g);
|
||||
if (err != 0) {
|
||||
unit_return_fail(m, "failed to init pd cache");
|
||||
}
|
||||
|
||||
err = g->ops.mm.init_mm_support(g);
|
||||
if (err != 0) {
|
||||
unit_return_fail(m, "failed to init gk20a mm");
|
||||
|
||||
@@ -174,6 +174,10 @@ int test_fifo_init_support(struct unit_module *m, struct gk20a *g, void *args)
|
||||
g->ops.userd.setup_sw = stub_userd_setup_sw;
|
||||
#endif
|
||||
g->ops.ecc.ecc_init_support(g);
|
||||
|
||||
/* PD cache must be initialized prior to mm init */
|
||||
err = nvgpu_pd_cache_init(g);
|
||||
|
||||
g->ops.mm.init_mm_support(g);
|
||||
|
||||
nvgpu_device_init(g);
|
||||
|
||||
@@ -84,6 +84,12 @@ int test_gr_init_setup(struct unit_module *m, struct gk20a *g, void *args)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
err = nvgpu_pd_cache_init(g);
|
||||
if (err != 0) {
|
||||
unit_err(m, "PD cache initialization failed\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return UNIT_SUCCESS;
|
||||
|
||||
fail:
|
||||
|
||||
@@ -158,6 +158,11 @@ int test_init_mm(struct unit_module *m, struct gk20a *g, void *args)
|
||||
g->ops.fb.intr.enable = gv11b_fb_intr_enable;
|
||||
g->ops.fb.ecc.init = NULL;
|
||||
|
||||
err = nvgpu_pd_cache_init(g);
|
||||
if (err != 0) {
|
||||
unit_return_fail(m, "pd cache initialization failed\n");
|
||||
}
|
||||
|
||||
err = nvgpu_init_mm_support(g);
|
||||
if (err != 0) {
|
||||
unit_return_fail(m, "nvgpu_init_mm_support failed err=%d\n",
|
||||
|
||||
@@ -204,6 +204,10 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
|
||||
unit_return_fail(m, "nvgpu_vm_init failed\n");
|
||||
}
|
||||
|
||||
if (nvgpu_pd_cache_init(g) != 0) {
|
||||
unit_return_fail(m, "pd cache initialization failed\n");
|
||||
}
|
||||
|
||||
return UNIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -383,6 +383,10 @@ int test_nvgpu_gmmu_init(struct unit_module *m, struct gk20a *g, void *args)
|
||||
|
||||
init_platform(m, g, true);
|
||||
|
||||
if (nvgpu_pd_cache_init(g) != 0) {
|
||||
unit_return_fail(m, "PD cache initialization failed\n");
|
||||
}
|
||||
|
||||
if (init_mm(m, g) != 0) {
|
||||
unit_return_fail(m, "nvgpu_init_mm_support failed\n");
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -321,7 +321,7 @@ int test_pd_free_empty_pd(struct unit_module *m, struct gk20a *g,
|
||||
|
||||
/* And now direct frees. */
|
||||
memset(&pd, 0U, sizeof(pd));
|
||||
err = nvgpu_pd_alloc(&vm, &pd, PAGE_SIZE);
|
||||
err = nvgpu_pd_alloc(&vm, &pd, NVGPU_PD_CACHE_SIZE);
|
||||
if (err != 0) {
|
||||
unit_return_fail(m, "PD alloc failed");
|
||||
}
|
||||
@@ -610,7 +610,7 @@ static int do_test_pd_cache_packing_size(struct unit_module *m, struct gk20a *g,
|
||||
{
|
||||
int err;
|
||||
u32 i;
|
||||
u32 n = PAGE_SIZE / pd_size;
|
||||
u32 n = NVGPU_PD_CACHE_SIZE / pd_size;
|
||||
struct nvgpu_gmmu_pd pds[n], pd;
|
||||
struct nvgpu_posix_fault_inj *dma_fi =
|
||||
nvgpu_dma_alloc_get_fault_injection();
|
||||
@@ -667,7 +667,7 @@ static int do_test_pd_reusability(struct unit_module *m, struct gk20a *g,
|
||||
{
|
||||
int err = UNIT_SUCCESS;
|
||||
u32 i;
|
||||
u32 n = PAGE_SIZE / pd_size;
|
||||
u32 n = NVGPU_PD_CACHE_SIZE / pd_size;
|
||||
struct nvgpu_gmmu_pd pds[n];
|
||||
struct nvgpu_posix_fault_inj *dma_fi =
|
||||
nvgpu_dma_alloc_get_fault_injection();
|
||||
|
||||
@@ -126,6 +126,7 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
|
||||
u64 low_hole, aperture_size;
|
||||
struct nvgpu_os_posix *p = nvgpu_os_posix_from_gk20a(g);
|
||||
struct mm_gk20a *mm = &g->mm;
|
||||
int err;
|
||||
|
||||
p->mm_is_iommuable = true;
|
||||
|
||||
@@ -193,6 +194,11 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
|
||||
unit_return_fail(m, "'bar2' nvgpu_vm_init failed\n");
|
||||
}
|
||||
|
||||
err = nvgpu_pd_cache_init(g);
|
||||
if (err != 0) {
|
||||
unit_return_fail(m, "PD cache init failed\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* This initialization will make sure that correct aperture mask
|
||||
* is returned */
|
||||
|
||||
@@ -229,6 +229,12 @@ static int init_pmu_falcon_test_env(struct unit_module *m, struct gk20a *g)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
err = nvgpu_pd_cache_init(g);
|
||||
if (err != 0) {
|
||||
unit_err(m, " PD cache allocation failed!\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -89,6 +89,11 @@ int test_rc_init(struct unit_module *m, struct gk20a *g, void *args)
|
||||
unit_return_fail(m, "fifo reg_space failure");
|
||||
}
|
||||
|
||||
ret = nvgpu_pd_cache_init(g);
|
||||
if (ret != 0) {
|
||||
unit_return_fail(m, "PD cache initialization failure");
|
||||
}
|
||||
|
||||
nvgpu_device_init(g);
|
||||
|
||||
g->ops.gr.init.get_no_of_sm = stub_gv11b_gr_init_get_no_of_sm;
|
||||
|
||||
@@ -98,6 +98,10 @@ static int init_channel_vm(struct unit_module *m, struct nvgpu_channel *ch)
|
||||
|
||||
ch->vm = mm->pmu.vm;
|
||||
|
||||
if (nvgpu_pd_cache_init(g) != 0) {
|
||||
unit_return_fail(m, "pd cache initialization failed\n");
|
||||
}
|
||||
|
||||
return UNIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user