gpu: nvgpu: add and use sw-only timers

The nvgpu timeout API has an internal override for presilicon mode by default: in presi simulation environments the timeouts never trigger. This behaviour is intended in the original usecase of the timer unit with hardware polling loops. In pure software logic though, the timer must trigger after the specified timeout even in presi mode so add a new init function to produce a timer for software logic. Use this new kind of timer in channel and scheduling worker threads. The channel worker currently times out for just the purpose of the channel watchdog timer which has its own internal timer. Although that's just software, the general expectation is that the watchdog does not trigger in presilicon tests that run slower than usual. The internal watchdog timer thus keeps the non-sw mode. Bug 3521828 Change-Id: I48ae8522c7ce2346a930e766528d8b64195f81d8 Signed-off-by: Konsta Hölttä <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2662541 Reviewed-by: svcacv <svcacv@nvidia.com> Reviewed-by: Sagar Kamble <skamble@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> GVS: Gerrit_Virtual_Submit
2025-12-22 09:12:24 +03:00 · 2022-02-01 15:59:42 +02:00
parent 621417bc73
commit 8736c0d467
7 changed files with 53 additions and 11 deletions
--- a/drivers/gpu/nvgpu/common/fifo/channel_wdt.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel_wdt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2015-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -142,7 +142,7 @@ void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker)

 	ch_worker->watchdog_interval = 100U;

-	nvgpu_timeout_init_cpu_timer(worker->g, &ch_worker->timeout,
+	nvgpu_timeout_init_cpu_timer_sw(worker->g, &ch_worker->timeout,
 			ch_worker->watchdog_interval);
 }

@@ -175,7 +175,7 @@ void nvgpu_channel_worker_poll_wakeup_post_process_item(

 	if (nvgpu_timeout_peek_expired(&ch_worker->timeout)) {
 		nvgpu_channel_poll_wdt(g);
-		nvgpu_timeout_init_cpu_timer(g, &ch_worker->timeout,
+		nvgpu_timeout_init_cpu_timer_sw(g, &ch_worker->timeout,
 				ch_worker->watchdog_interval);
 	}
 }
--- a/drivers/gpu/nvgpu/common/fifo/watchdog.c
+++ b/drivers/gpu/nvgpu/common/fifo/watchdog.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -92,6 +92,10 @@ static void nvgpu_channel_wdt_init(struct nvgpu_channel_wdt *wdt,
 {
 	struct gk20a *g = wdt->g;

+	/*
+	 * Note: this is intentionally not the sw kind of timer to avoid false
+	 * triggers in pre-si environments that tend to run slow.
+	 */
 	nvgpu_timeout_init_cpu_timer(g, &wdt->timer, wdt->limit_ms);

 	wdt->ch_state = *state;
--- a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c
+++ b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -67,7 +67,7 @@ static void nvgpu_nvs_worker_poll_init(struct nvgpu_worker *worker)

 	/* 100 ms is a nice arbitrary timeout for default status */
 	nvs_worker->current_timeout = 100;
-	nvgpu_timeout_init_cpu_timer(worker->g, &nvs_worker->timeout,
+	nvgpu_timeout_init_cpu_timer_sw(worker->g, &nvs_worker->timeout,
 			nvs_worker->current_timeout);
 }

@@ -135,7 +135,7 @@ static void nvgpu_nvs_worker_wakeup_post_process(struct nvgpu_worker *worker)
 				(next_timeout_ns + NSEC_PER_MSEC - 1) / NSEC_PER_MSEC;
 		}

-		nvgpu_timeout_init_cpu_timer(g, &nvs_worker->timeout,
+		nvgpu_timeout_init_cpu_timer_sw(g, &nvs_worker->timeout,
 				nvs_worker->current_timeout);
 	}
 }
--- a/drivers/gpu/nvgpu/include/nvgpu/timers.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/timers.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -166,6 +166,24 @@ int nvgpu_timeout_init_flags(struct gk20a *g, struct nvgpu_timeout *timeout,
 void nvgpu_timeout_init_cpu_timer(struct gk20a *g, struct nvgpu_timeout *timeout,
 		       u32 duration_ms);

+/**
+ * @brief Initialize a pure software timeout.
+ *
+ * Init a cpu clock based timeout with no pre-si override. See
+ * nvgpu_timeout_init_flags() and NVGPU_TIMER_CPU_TIMER for full explanation.
+ *
+ * This builds a timer that has the NVGPU_TIMER_NO_PRE_SI flag. Most often,
+ * hardware polling related loops are preferred to be infinite in presilicon
+ * simulation mode. That's not the case in some timers for only software logic,
+ * which this function is for.
+ *
+ * @param g [in]	GPU driver structure.
+ * @param timeout [in]	Timeout object to initialize.
+ * @param duration [in]	Timeout duration in milliseconds.
+ */
+void nvgpu_timeout_init_cpu_timer_sw(struct gk20a *g, struct nvgpu_timeout *timeout,
+		       u32 duration_ms);
+
 /**
 * @brief Initialize a timeout.
 *
--- a/drivers/gpu/nvgpu/os/linux/cde.c
+++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -1,7 +1,7 @@
 /*
 * Color decompression engine support
 *
- * Copyright (c) 2014-2021, NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2014-2022, NVIDIA Corporation.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -936,7 +936,7 @@ __acquires(&cde_app->mutex)
 	struct gk20a_cde_ctx *cde_ctx = NULL;
 	struct nvgpu_timeout timeout;

-	nvgpu_timeout_init_cpu_timer(g, &timeout, MAX_CTX_RETRY_TIME);
+	nvgpu_timeout_init_cpu_timer_sw(g, &timeout, MAX_CTX_RETRY_TIME);

 	do {
 		cde_ctx = gk20a_cde_do_get_context(l);
--- a/drivers/gpu/nvgpu/os/linux/timers.c
+++ b/drivers/gpu/nvgpu/os/linux/timers.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -45,6 +45,16 @@ void nvgpu_timeout_init_cpu_timer(struct gk20a *g, struct nvgpu_timeout *timeout
 	nvgpu_assert(err == 0);
 }

+void nvgpu_timeout_init_cpu_timer_sw(struct gk20a *g, struct nvgpu_timeout *timeout,
+		       u32 duration_ms)
+{
+	int err = nvgpu_timeout_init_flags(g, timeout, duration_ms,
+					   NVGPU_TIMER_CPU_TIMER |
+					   NVGPU_TIMER_NO_PRE_SI);
+
+	nvgpu_assert(err == 0);
+}
+
 void nvgpu_timeout_init_retry(struct gk20a *g, struct nvgpu_timeout *timeout,
 		       u32 duration_count)
 {
--- a/drivers/gpu/nvgpu/os/posix/timers.c
+++ b/drivers/gpu/nvgpu/os/posix/timers.c
@@ -132,6 +132,16 @@ void nvgpu_timeout_init_cpu_timer(struct gk20a *g, struct nvgpu_timeout *timeout
 	nvgpu_assert(err == 0);
 }

+void nvgpu_timeout_init_cpu_timer_sw(struct gk20a *g, struct nvgpu_timeout *timeout,
+		       u32 duration_ms)
+{
+	int err = nvgpu_timeout_init_flags(g, timeout, duration_ms,
+					   NVGPU_TIMER_CPU_TIMER |
+					   NVGPU_TIMER_NO_PRE_SI);
+
+	nvgpu_assert(err == 0);
+}
+
 void nvgpu_timeout_init_retry(struct gk20a *g, struct nvgpu_timeout *timeout,
 		       u32 duration_count)
 {