gpu: nvgpu: require deferred cleanup for aggressive sync destroy

Aggressive sync destroy is used on some platforms where the amount of syncpoints is limited. It can cause sync objects to get allocated and freed in the submit path and when jobs are cleaned up, so require deferred cleanup. Allocations do not belong to job tracking in a deterministic submit path. Although this has been technically allowed before, deterministic channels have likely not been a priority on those old platforms with aggressive sync destroy set. Update virtualized gp10b platform data to match on a gp10b-vgpu compat string instead of gk20a-vgpu. gk20a (Tegra T124) hasn't been supported for a long time. Delete the aggressive sync destroy field from this platform. It's got enough syncpoints to not dynamically allocate them; having this property set for gp10b-vgpu has likely been a mistake. This is not a completely pure cherry-pick: also extend the gpu characteristics to not advertise full deterministic submit support when aggressive sync destroy is off. This platform flag cannot be adjusted by the user unlike many other flags. Jira NVGPU-4548 Change-Id: I283f546d48b79ac94b943d88e5dce55710858330 Signed-off-by: Konsta Hölttä <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2322042 (cherry picked from commit b1ba2b997b2174e365bcb0782ef3e67260ff9e57) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2328411 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 09:57:08 +03:00 · 2020-04-01 16:01:53 +03:00
parent 2001b8ec97
commit dd2fb50a1a
5 changed files with 19 additions and 11 deletions
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -460,6 +460,8 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 		 *   allocated, not pooled)
 		 * - dependency on sync framework for post fences
 		 * - buffer refcounting, which is O(n)
+		 * - aggressive sync destroy is enabled (sync objects are not
+		 *   held during channel lifetime but refcounted for submits)
 		 * - channel wdt, which needs periodic async cleanup
 		 *
 		 * If none of the conditions are met, then deferred clean-up
@@ -469,7 +471,8 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 		need_deferred_cleanup = !nvgpu_channel_is_deterministic(c) ||
 					!nvgpu_has_syncpoints(g) ||
 					(flag_sync_fence && flag_fence_get) ||
-					!skip_buffer_refcounting;
+					!skip_buffer_refcounting ||
+					g->aggressive_sync_destroy_thresh != 0U;

 #ifdef CONFIG_NVGPU_CHANNEL_WDT
 		need_deferred_cleanup = need_deferred_cleanup || c->wdt.enabled;
--- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c
+++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
@@ -767,18 +767,25 @@ int nvgpu_init_gpu_characteristics(struct gk20a *g)
 			true);

 	/*
+	 * Full deterministic submit means that synchronization (pre and post
+	 * fences; implies job tracking) can be used. If such submits can be
+	 * guaranteed as long as the channel is set up correctly by userspace
+	 * (e.g., watchdog disabled), this bit is set.
+	 *
 	 * Sync framework is needed when we don't have syncpoint support
 	 * because we don't have a means to expose raw gpu semas in a way
-	 * similar to raw syncpts. Use of the framework requires heavy stuff
-	 * like deferred job cleanup and wrapping syncs in FDs which prevents
-	 * deterministic submits. This is supported otherwise, provided that
-	 * the user doesn't request anything that depends on deferred cleanup.
+	 * similar to raw syncpts. Use of the framework requires unpredictable
+	 * actions including deferred job cleanup and wrapping syncs in FDs.
+	 *
+	 * Aggressive sync destroy causes the channel syncpoint to be abruptly
+	 * allocated and deleted during submit path and deferred cleanup.
 	 *
 	 * Note that userspace expects this to be set for usermode submits
 	 * (even if kernel-mode submits aren't enabled where full deterministic
 	 * features matter).
 	 */
-	if (nvgpu_has_syncpoints(g)) {
+	if (nvgpu_has_syncpoints(g) &&
+			g->aggressive_sync_destroy_thresh == 0U) {
 		nvgpu_set_enabled(g,
 				NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
 				true);
--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -723,7 +723,7 @@ static struct of_device_id tegra_gk20a_of_match[] = {
 		.data = &gv11b_vgpu_tegra_platform},
 #endif
 #ifdef CONFIG_NVGPU_GR_VIRTUALIZATION
-	{ .compatible = "nvidia,tegra124-gk20a-vgpu",
+	{ .compatible = "nvidia,tegra186-gp10b-vgpu",
 		.data = &vgpu_tegra_platform },
 #endif
 #if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA)
--- a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h
@@ -61,7 +61,7 @@ enum tegra_chip_id {
 	TEGRA_186,
 	TEGRA_194,
 	TEGRA_194_VGPU,
-	TEGRA_124_VGPU,
+	TEGRA_186_VGPU,
 	TEGRA_234,
 };

--- a/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c
@@ -67,8 +67,6 @@ struct gk20a_platform vgpu_tegra_platform = {
 #ifdef CONFIG_TEGRA_GK20A_NVHOST
 	.has_syncpoints = true,
 #endif
-	.aggressive_sync_destroy_thresh = 64,
-
 	/* power management configuration */
 	.can_railgate_init	= false,
 	.can_elpg_init          = false,
@@ -88,7 +86,7 @@ struct gk20a_platform vgpu_tegra_platform = {
 	.clk_round_rate = vgpu_plat_clk_round_rate,
 	.get_clk_freqs = vgpu_plat_clk_get_freqs,

-	.platform_chip_id = TEGRA_124_VGPU,
+	.platform_chip_id = TEGRA_186_VGPU,

 	/* frequency scaling configuration */
 	.devfreq_governor = "userspace",