diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index 09885128a..cc50ed741 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -460,6 +460,8 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, * allocated, not pooled) * - dependency on sync framework for post fences * - buffer refcounting, which is O(n) + * - aggressive sync destroy is enabled (sync objects are not + * held during channel lifetime but refcounted for submits) * - channel wdt, which needs periodic async cleanup * * If none of the conditions are met, then deferred clean-up @@ -469,7 +471,8 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, need_deferred_cleanup = !nvgpu_channel_is_deterministic(c) || !nvgpu_has_syncpoints(g) || (flag_sync_fence && flag_fence_get) || - !skip_buffer_refcounting; + !skip_buffer_refcounting || + g->aggressive_sync_destroy_thresh != 0U; #ifdef CONFIG_NVGPU_CHANNEL_WDT need_deferred_cleanup = need_deferred_cleanup || c->wdt.enabled; diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c index 62c4c773c..1c6dd731a 100644 --- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -767,18 +767,25 @@ int nvgpu_init_gpu_characteristics(struct gk20a *g) true); /* + * Full deterministic submit means that synchronization (pre and post + * fences; implies job tracking) can be used. If such submits can be + * guaranteed as long as the channel is set up correctly by userspace + * (e.g., watchdog disabled), this bit is set. + * * Sync framework is needed when we don't have syncpoint support * because we don't have a means to expose raw gpu semas in a way - * similar to raw syncpts. Use of the framework requires heavy stuff - * like deferred job cleanup and wrapping syncs in FDs which prevents - * deterministic submits. This is supported otherwise, provided that - * the user doesn't request anything that depends on deferred cleanup. + * similar to raw syncpts. Use of the framework requires unpredictable + * actions including deferred job cleanup and wrapping syncs in FDs. + * + * Aggressive sync destroy causes the channel syncpoint to be abruptly + * allocated and deleted during submit path and deferred cleanup. * * Note that userspace expects this to be set for usermode submits * (even if kernel-mode submits aren't enabled where full deterministic * features matter). */ - if (nvgpu_has_syncpoints(g)) { + if (nvgpu_has_syncpoints(g) && + g->aggressive_sync_destroy_thresh == 0U) { nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL, true); diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c index 4b70f0460..3a70684a7 100644 --- a/drivers/gpu/nvgpu/os/linux/module.c +++ b/drivers/gpu/nvgpu/os/linux/module.c @@ -723,7 +723,7 @@ static struct of_device_id tegra_gk20a_of_match[] = { .data = &gv11b_vgpu_tegra_platform}, #endif #ifdef CONFIG_NVGPU_GR_VIRTUALIZATION - { .compatible = "nvidia,tegra124-gk20a-vgpu", + { .compatible = "nvidia,tegra186-gp10b-vgpu", .data = &vgpu_tegra_platform }, #endif #if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA) diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h index 62d61e784..cc1532483 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h +++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h @@ -61,7 +61,7 @@ enum tegra_chip_id { TEGRA_186, TEGRA_194, TEGRA_194_VGPU, - TEGRA_124_VGPU, + TEGRA_186_VGPU, TEGRA_234, }; diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c index 765f6fefa..6066fef6b 100644 --- a/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c +++ b/drivers/gpu/nvgpu/os/linux/vgpu/platform_vgpu_tegra.c @@ -67,8 +67,6 @@ struct gk20a_platform vgpu_tegra_platform = { #ifdef CONFIG_TEGRA_GK20A_NVHOST .has_syncpoints = true, #endif - .aggressive_sync_destroy_thresh = 64, - /* power management configuration */ .can_railgate_init = false, .can_elpg_init = false, @@ -88,7 +86,7 @@ struct gk20a_platform vgpu_tegra_platform = { .clk_round_rate = vgpu_plat_clk_round_rate, .get_clk_freqs = vgpu_plat_clk_get_freqs, - .platform_chip_id = TEGRA_124_VGPU, + .platform_chip_id = TEGRA_186_VGPU, /* frequency scaling configuration */ .devfreq_governor = "userspace",