gpu: nvgpu: require deferred cleanup for aggressive sync destroy

Aggressive sync destroy is used on some platforms where the amount of
syncpoints is limited. It can cause sync objects to get allocated and
freed in the submit path and when jobs are cleaned up, so require
deferred cleanup. Allocations do not belong to job tracking in a
deterministic submit path.

Although this has been technically allowed before, deterministic
channels have likely not been a priority on those old platforms with
aggressive sync destroy set.

Update virtualized gp10b platform data to match on a gp10b-vgpu compat
string instead of gk20a-vgpu. gk20a (Tegra T124) hasn't been supported
for a long time. Delete the aggressive sync destroy field from this
platform. It's got enough syncpoints to not dynamically allocate them;
having this property set for gp10b-vgpu has likely been a mistake.

This is not a completely pure cherry-pick: also extend the gpu
characteristics to not advertise full deterministic submit support when
aggressive sync destroy is off. This platform flag cannot be adjusted by
the user unlike many other flags.

Jira NVGPU-4548

Change-Id: I283f546d48b79ac94b943d88e5dce55710858330
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2322042
(cherry picked from commit b1ba2b997b2174e365bcb0782ef3e67260ff9e57)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2328411
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Hölttä
2020-04-01 16:01:53 +03:00
committed by Alex Waterman
parent 2001b8ec97
commit dd2fb50a1a
5 changed files with 19 additions and 11 deletions

View File

@@ -460,6 +460,8 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
* allocated, not pooled)
* - dependency on sync framework for post fences
* - buffer refcounting, which is O(n)
* - aggressive sync destroy is enabled (sync objects are not
* held during channel lifetime but refcounted for submits)
* - channel wdt, which needs periodic async cleanup
*
* If none of the conditions are met, then deferred clean-up
@@ -469,7 +471,8 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
need_deferred_cleanup = !nvgpu_channel_is_deterministic(c) ||
!nvgpu_has_syncpoints(g) ||
(flag_sync_fence && flag_fence_get) ||
!skip_buffer_refcounting;
!skip_buffer_refcounting ||
g->aggressive_sync_destroy_thresh != 0U;
#ifdef CONFIG_NVGPU_CHANNEL_WDT
need_deferred_cleanup = need_deferred_cleanup || c->wdt.enabled;

View File

@@ -767,18 +767,25 @@ int nvgpu_init_gpu_characteristics(struct gk20a *g)
true);
/*
* Full deterministic submit means that synchronization (pre and post
* fences; implies job tracking) can be used. If such submits can be
* guaranteed as long as the channel is set up correctly by userspace
* (e.g., watchdog disabled), this bit is set.
*
* Sync framework is needed when we don't have syncpoint support
* because we don't have a means to expose raw gpu semas in a way
* similar to raw syncpts. Use of the framework requires heavy stuff
* like deferred job cleanup and wrapping syncs in FDs which prevents
* deterministic submits. This is supported otherwise, provided that
* the user doesn't request anything that depends on deferred cleanup.
* similar to raw syncpts. Use of the framework requires unpredictable
* actions including deferred job cleanup and wrapping syncs in FDs.
*
* Aggressive sync destroy causes the channel syncpoint to be abruptly
* allocated and deleted during submit path and deferred cleanup.
*
* Note that userspace expects this to be set for usermode submits
* (even if kernel-mode submits aren't enabled where full deterministic
* features matter).
*/
if (nvgpu_has_syncpoints(g)) {
if (nvgpu_has_syncpoints(g) &&
g->aggressive_sync_destroy_thresh == 0U) {
nvgpu_set_enabled(g,
NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL,
true);

View File

@@ -723,7 +723,7 @@ static struct of_device_id tegra_gk20a_of_match[] = {
.data = &gv11b_vgpu_tegra_platform},
#endif
#ifdef CONFIG_NVGPU_GR_VIRTUALIZATION
{ .compatible = "nvidia,tegra124-gk20a-vgpu",
{ .compatible = "nvidia,tegra186-gp10b-vgpu",
.data = &vgpu_tegra_platform },
#endif
#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA)

View File

@@ -61,7 +61,7 @@ enum tegra_chip_id {
TEGRA_186,
TEGRA_194,
TEGRA_194_VGPU,
TEGRA_124_VGPU,
TEGRA_186_VGPU,
TEGRA_234,
};

View File

@@ -67,8 +67,6 @@ struct gk20a_platform vgpu_tegra_platform = {
#ifdef CONFIG_TEGRA_GK20A_NVHOST
.has_syncpoints = true,
#endif
.aggressive_sync_destroy_thresh = 64,
/* power management configuration */
.can_railgate_init = false,
.can_elpg_init = false,
@@ -88,7 +86,7 @@ struct gk20a_platform vgpu_tegra_platform = {
.clk_round_rate = vgpu_plat_clk_round_rate,
.get_clk_freqs = vgpu_plat_clk_get_freqs,
.platform_chip_id = TEGRA_124_VGPU,
.platform_chip_id = TEGRA_186_VGPU,
/* frequency scaling configuration */
.devfreq_governor = "userspace",