gpu: nvgpu: pd_cache enablement for >4k allocations in qnx

Mapping of large buffers to GMMU end up needing many pages for the PTE tables. Allocating these one by one can end up being a performance bottleneck, particularly in the virtualized case. This is adding the following changes: - As the TLB invalidation doesn't have access to mem_off, allow top-level allocation by alloc_cache_direct(). - Define NVGPU_PD_CACHE_SIZE, the allocation size for a new slab for the PD cache, effectively set to 64K bytes - Use the PD cache for any allocation < NVGPU_PD_CACHE_SIZE When freeing up cached entries, avoid prefetch errors by invalidating the entry (memset to 0). - Try to fall back to direct allocation of smaller chunk for contiguous allocation failures. - Unit test changes. Bug 200649243 Change-Id: I0a667af0ba01d9147c703e64fc970880e52a8fbc Signed-off-by: dt <dt@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2404371 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2020-08-26 16:25:36 -07:00
parent 94bc3a8135
commit a331fd4b3a
16 changed files with 122 additions and 22 deletions
--- a/userspace/units/acr/nvgpu-acr.c
+++ b/userspace/units/acr/nvgpu-acr.c
@@ -308,6 +308,11 @@ static int init_test_env(struct unit_module *m, struct gk20a *g)
 		unit_return_fail(m, "ecc init failed\n");
 	}

+	err = nvgpu_pd_cache_init(g);
+	if (err != 0) {
+		unit_return_fail(m, "failed to init pd cache");
+	}
+
 	err = g->ops.mm.init_mm_support(g);
 	if (err != 0) {
 		unit_return_fail(m, "failed to init gk20a mm");
--- a/userspace/units/fifo/nvgpu-fifo-common.c
+++ b/userspace/units/fifo/nvgpu-fifo-common.c
@@ -174,6 +174,10 @@ int test_fifo_init_support(struct unit_module *m, struct gk20a *g, void *args)
 	g->ops.userd.setup_sw = stub_userd_setup_sw;
 #endif
 	g->ops.ecc.ecc_init_support(g);
+
+	/* PD cache must be initialized prior to mm init */
+	err = nvgpu_pd_cache_init(g);
+
 	g->ops.mm.init_mm_support(g);

 	nvgpu_device_init(g);
--- a/userspace/units/gr/nvgpu-gr.c
+++ b/userspace/units/gr/nvgpu-gr.c
@@ -84,6 +84,12 @@ int test_gr_init_setup(struct unit_module *m, struct gk20a *g, void *args)
 		return -ENOMEM;
 	}

+	err = nvgpu_pd_cache_init(g);
+	if (err != 0) {
+		unit_err(m, "PD cache initialization failed\n");
+		return -ENOMEM;
+	}
+
 	return UNIT_SUCCESS;

 fail:
--- a/userspace/units/mm/as/as.c
+++ b/userspace/units/mm/as/as.c
@@ -158,6 +158,11 @@ int test_init_mm(struct unit_module *m, struct gk20a *g, void *args)
 	g->ops.fb.intr.enable = gv11b_fb_intr_enable;
 	g->ops.fb.ecc.init = NULL;

+	err = nvgpu_pd_cache_init(g);
+	if (err != 0) {
+		unit_return_fail(m, "pd cache initialization failed\n");
+	}
+
 	err = nvgpu_init_mm_support(g);
 	if (err != 0) {
 		unit_return_fail(m, "nvgpu_init_mm_support failed err=%d\n",
--- a/userspace/units/mm/dma/dma.c
+++ b/userspace/units/mm/dma/dma.c
@@ -204,6 +204,10 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 		unit_return_fail(m, "nvgpu_vm_init failed\n");
 	}

+	if (nvgpu_pd_cache_init(g) != 0) {
+		unit_return_fail(m, "pd cache initialization failed\n");
+	}
+
 	return UNIT_SUCCESS;
 }

--- a/userspace/units/mm/gmmu/page_table/page_table.c
+++ b/userspace/units/mm/gmmu/page_table/page_table.c
@@ -383,6 +383,10 @@ int test_nvgpu_gmmu_init(struct unit_module *m, struct gk20a *g, void *args)

 	init_platform(m, g, true);

+	if (nvgpu_pd_cache_init(g) != 0) {
+		unit_return_fail(m, "PD cache initialization failed\n");
+	}
+
 	if (init_mm(m, g) != 0) {
 		unit_return_fail(m, "nvgpu_init_mm_support failed\n");
 	}
--- a/userspace/units/mm/gmmu/pd_cache/pd_cache.c
+++ b/userspace/units/mm/gmmu/pd_cache/pd_cache.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -321,7 +321,7 @@ int test_pd_free_empty_pd(struct unit_module *m, struct gk20a *g,

 	/* And now direct frees. */
 	memset(&pd, 0U, sizeof(pd));
-	err = nvgpu_pd_alloc(&vm, &pd, PAGE_SIZE);
+	err = nvgpu_pd_alloc(&vm, &pd, NVGPU_PD_CACHE_SIZE);
 	if (err != 0) {
 		unit_return_fail(m, "PD alloc failed");
 	}
@@ -610,7 +610,7 @@ static int do_test_pd_cache_packing_size(struct unit_module *m, struct gk20a *g,
 {
 	int err;
 	u32 i;
-	u32 n = PAGE_SIZE / pd_size;
+	u32 n = NVGPU_PD_CACHE_SIZE / pd_size;
 	struct nvgpu_gmmu_pd pds[n], pd;
 	struct nvgpu_posix_fault_inj *dma_fi =
 		nvgpu_dma_alloc_get_fault_injection();
@@ -667,7 +667,7 @@ static int do_test_pd_reusability(struct unit_module *m, struct gk20a *g,
 {
 	int err = UNIT_SUCCESS;
 	u32 i;
-	u32 n = PAGE_SIZE / pd_size;
+	u32 n = NVGPU_PD_CACHE_SIZE / pd_size;
 	struct nvgpu_gmmu_pd pds[n];
 	struct nvgpu_posix_fault_inj *dma_fi =
 		nvgpu_dma_alloc_get_fault_injection();
--- a/userspace/units/mm/hal/mmu_fault/gv11b_fusa/mmu-fault-gv11b-fusa.c
+++ b/userspace/units/mm/hal/mmu_fault/gv11b_fusa/mmu-fault-gv11b-fusa.c
@@ -126,6 +126,7 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 	u64 low_hole, aperture_size;
 	struct nvgpu_os_posix *p = nvgpu_os_posix_from_gk20a(g);
 	struct mm_gk20a *mm = &g->mm;
+	int err;

 	p->mm_is_iommuable = true;

@@ -193,6 +194,11 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 		unit_return_fail(m, "'bar2' nvgpu_vm_init failed\n");
 	}

+	err = nvgpu_pd_cache_init(g);
+	if (err != 0) {
+		unit_return_fail(m, "PD cache init failed\n");
+	}
+
 	/*
 	 * This initialization will make sure that correct aperture mask
 	 * is returned */
--- a/userspace/units/pmu/nvgpu-pmu.c
+++ b/userspace/units/pmu/nvgpu-pmu.c
@@ -229,6 +229,12 @@ static int init_pmu_falcon_test_env(struct unit_module *m, struct gk20a *g)
 		return -ENOMEM;
 	}

+	err = nvgpu_pd_cache_init(g);
+	if (err != 0) {
+		unit_err(m, " PD cache allocation failed!\n");
+		return -ENOMEM;
+	}
+
 	return 0;
 }

--- a/userspace/units/rc/nvgpu-rc.c
+++ b/userspace/units/rc/nvgpu-rc.c
@@ -89,6 +89,11 @@ int test_rc_init(struct unit_module *m, struct gk20a *g, void *args)
 		unit_return_fail(m, "fifo reg_space failure");
 	}

+	ret = nvgpu_pd_cache_init(g);
+	if (ret != 0) {
+		unit_return_fail(m, "PD cache initialization failure");
+	}
+
 	nvgpu_device_init(g);

 	g->ops.gr.init.get_no_of_sm = stub_gv11b_gr_init_get_no_of_sm;
--- a/userspace/units/sync/nvgpu-sync.c
+++ b/userspace/units/sync/nvgpu-sync.c
@@ -98,6 +98,10 @@ static int init_channel_vm(struct unit_module *m, struct nvgpu_channel *ch)

 	ch->vm = mm->pmu.vm;

+	if (nvgpu_pd_cache_init(g) != 0) {
+		unit_return_fail(m, "pd cache initialization failed\n");
+	}
+
 	return UNIT_SUCCESS;
 }