diff --git a/arch/nvgpu-common.yaml b/arch/nvgpu-common.yaml index ca63e62db..f493d5753 100644 --- a/arch/nvgpu-common.yaml +++ b/arch/nvgpu-common.yaml @@ -330,7 +330,9 @@ fifo: channel: safe: yes sources: [ common/fifo/channel.c, + common/fifo/watchdog.c, include/nvgpu/channel.h, + include/nvgpu/watchdog.h, include/nvgpu/gops_channel.h, include/nvgpu/gops_ramfc.h, include/nvgpu/gops_ramin.h, diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index fffc055c3..97e844089 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -535,6 +535,7 @@ nvgpu-y += \ common/fifo/engines.o \ common/fifo/pbdma_status.o \ common/fifo/userd.o \ + common/fifo/watchdog.o \ common/fence/fence.o \ common/ecc.o \ common/log_common.o \ diff --git a/drivers/gpu/nvgpu/Makefile.shared.configs b/drivers/gpu/nvgpu/Makefile.shared.configs index 1b33e1fc2..882ee8e91 100644 --- a/drivers/gpu/nvgpu/Makefile.shared.configs +++ b/drivers/gpu/nvgpu/Makefile.shared.configs @@ -243,6 +243,7 @@ CONFIG_NVGPU_SW_SEMAPHORE := 1 NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_SW_SEMAPHORE # Enable Channel WDT for safety build until we switch to user mode submits only +CONFIG_NVGPU_CHANNEL_WDT := 1 NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_CHANNEL_WDT # Enable Kernel Mode submit for safety build until we switch to user mode diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 3363110f5..07780ce45 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -396,6 +396,10 @@ srcs += common/fifo/submit.c \ common/sync/channel_sync_syncpt.c endif +ifeq ($(CONFIG_NVGPU_CHANNEL_WDT),1) +srcs += common/fifo/watchdog.c +endif + ifeq ($(CONFIG_NVGPU_SW_SEMAPHORE),1) srcs += common/semaphore/semaphore_sea.c \ common/semaphore/semaphore_pool.c \ diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index a73558b86..9e3a6a9c7 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -454,257 +455,6 @@ u32 nvgpu_channel_update_gpfifo_get_and_get_free_count(struct nvgpu_channel *ch) return nvgpu_channel_get_gpfifo_free_count(ch); } -#ifdef CONFIG_NVGPU_CHANNEL_WDT - -static void nvgpu_channel_wdt_init(struct nvgpu_channel *ch) -{ - struct gk20a *g = ch->g; - int ret; - - if (nvgpu_channel_check_unserviceable(ch)) { - ch->wdt.running = false; - return; - } - - ret = nvgpu_timeout_init(g, &ch->wdt.timer, - ch->wdt.limit_ms, - NVGPU_TIMER_CPU_TIMER); - if (ret != 0) { - nvgpu_err(g, "timeout_init failed: %d", ret); - return; - } - - ch->wdt.gp_get = g->ops.userd.gp_get(g, ch); - ch->wdt.pb_get = g->ops.userd.pb_get(g, ch); - ch->wdt.running = true; -} - -/** - * Start a timeout counter (watchdog) on this channel. - * - * Trigger a watchdog to recover the channel after the per-platform timeout - * duration (but strictly no earlier) if the channel hasn't advanced within - * that time. - * - * If the timeout is already running, do nothing. This should be called when - * new jobs are submitted. The timeout will stop when the last tracked job - * finishes, making the channel idle. - * - * The channel's gpfifo read pointer will be used to determine if the job has - * actually stuck at that time. After the timeout duration has expired, a - * worker thread will consider the channel stuck and recover it if stuck. - */ -static void nvgpu_channel_wdt_start(struct nvgpu_channel *ch) -{ - if (!nvgpu_is_timeouts_enabled(ch->g)) { - return; - } - - if (!ch->wdt.enabled) { - return; - } - - nvgpu_spinlock_acquire(&ch->wdt.lock); - - if (ch->wdt.running) { - nvgpu_spinlock_release(&ch->wdt.lock); - return; - } - nvgpu_channel_wdt_init(ch); - nvgpu_spinlock_release(&ch->wdt.lock); -} - -/** - * Stop a running timeout counter (watchdog) on this channel. - * - * Make the watchdog consider the channel not running, so that it won't get - * recovered even if no progress is detected. Progress is not tracked if the - * watchdog is turned off. - * - * No guarantees are made about concurrent execution of the timeout handler. - * (This should be called from an update handler running in the same thread - * with the watchdog.) - */ -static bool nvgpu_channel_wdt_stop(struct nvgpu_channel *ch) -{ - bool was_running; - - nvgpu_spinlock_acquire(&ch->wdt.lock); - was_running = ch->wdt.running; - ch->wdt.running = false; - nvgpu_spinlock_release(&ch->wdt.lock); - return was_running; -} - -/** - * Continue a previously stopped timeout - * - * Enable the timeout again but don't reinitialize its timer. - * - * No guarantees are made about concurrent execution of the timeout handler. - * (This should be called from an update handler running in the same thread - * with the watchdog.) - */ -static void nvgpu_channel_wdt_continue(struct nvgpu_channel *ch) -{ - nvgpu_spinlock_acquire(&ch->wdt.lock); - ch->wdt.running = true; - nvgpu_spinlock_release(&ch->wdt.lock); -} - -/** - * Reset the counter of a timeout that is in effect. - * - * If this channel has an active timeout, act as if something happened on the - * channel right now. - * - * Rewinding a stopped counter is irrelevant; this is a no-op for non-running - * timeouts. Stopped timeouts can only be started (which is technically a - * rewind too) or continued (where the stop is actually pause). - */ -static void nvgpu_channel_wdt_rewind(struct nvgpu_channel *ch) -{ - nvgpu_spinlock_acquire(&ch->wdt.lock); - if (ch->wdt.running) { - nvgpu_channel_wdt_init(ch); - } - nvgpu_spinlock_release(&ch->wdt.lock); -} - -/** - * Rewind the timeout on each non-dormant channel. - * - * Reschedule the timeout of each active channel for which timeouts are running - * as if something was happened on each channel right now. This should be - * called when a global hang is detected that could cause a false positive on - * other innocent channels. - */ -void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g) -{ - struct nvgpu_fifo *f = &g->fifo; - u32 chid; - - for (chid = 0; chid < f->num_channels; chid++) { - struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); - - if (ch != NULL) { - if (!nvgpu_channel_check_unserviceable(ch)) { - nvgpu_channel_wdt_rewind(ch); - } - nvgpu_channel_put(ch); - } - } -} - -/** - * Check if a timed out channel has hung and recover it if it has. - * - * Test if this channel has really got stuck at this point by checking if its - * {gp,pb}_get has advanced or not. If no {gp,pb}_get action happened since - * when the watchdog was started and it's timed out, force-reset the channel. - * - * The gpu is implicitly on at this point, because the watchdog can only run on - * channels that have submitted jobs pending for cleanup. - */ -static void nvgpu_channel_wdt_handler(struct nvgpu_channel *ch) -{ - struct gk20a *g = ch->g; - u32 gp_get; - u32 new_gp_get; - u64 pb_get; - u64 new_pb_get; - - nvgpu_log_fn(g, " "); - - if (nvgpu_channel_check_unserviceable(ch)) { - /* channel is already recovered */ - if (nvgpu_channel_wdt_stop(ch) == true) { - nvgpu_info(g, "chid: %d unserviceable but wdt was ON", - ch->chid); - } - return; - } - - /* Get status but keep timer running */ - nvgpu_spinlock_acquire(&ch->wdt.lock); - gp_get = ch->wdt.gp_get; - pb_get = ch->wdt.pb_get; - nvgpu_spinlock_release(&ch->wdt.lock); - - new_gp_get = g->ops.userd.gp_get(g, ch); - new_pb_get = g->ops.userd.pb_get(g, ch); - - if (new_gp_get != gp_get || new_pb_get != pb_get) { - /* Channel has advanced, timer keeps going but resets */ - nvgpu_channel_wdt_rewind(ch); - } else if (!nvgpu_timeout_peek_expired(&ch->wdt.timer)) { - /* Seems stuck but waiting to time out */ - } else { - nvgpu_err(g, "Job on channel %d timed out", - ch->chid); - - /* force reset calls gk20a_debug_dump but not this */ - if (ch->wdt.debug_dump) { - gk20a_gr_debug_dump(g); - } - -#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL - if (g->ops.tsg.force_reset(ch, - NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, - ch->wdt.debug_dump) != 0) { - nvgpu_err(g, "failed tsg force reset for chid: %d", - ch->chid); - } -#endif - } -} - -/** - * Test if the per-channel watchdog is on; check the timeout in that case. - * - * Each channel has an expiration time based watchdog. The timer is - * (re)initialized in two situations: when a new job is submitted on an idle - * channel and when the timeout is checked but progress is detected. The - * watchdog timeout limit is a coarse sliding window. - * - * The timeout is stopped (disabled) after the last job in a row finishes - * and marks the channel idle. - */ -static void nvgpu_channel_wdt_check(struct nvgpu_channel *ch) -{ - bool running; - - nvgpu_spinlock_acquire(&ch->wdt.lock); - running = ch->wdt.running; - nvgpu_spinlock_release(&ch->wdt.lock); - - if (running) { - nvgpu_channel_wdt_handler(ch); - } -} - -/** - * Loop every living channel, check timeouts and handle stuck channels. - */ -static void nvgpu_channel_poll_wdt(struct gk20a *g) -{ - unsigned int chid; - - - for (chid = 0; chid < g->fifo.num_channels; chid++) { - struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); - - if (ch != NULL) { - if (!nvgpu_channel_check_unserviceable(ch)) { - nvgpu_channel_wdt_check(ch); - } - nvgpu_channel_put(ch); - } - } -} - -#endif /* CONFIG_NVGPU_CHANNEL_WDT */ - static inline struct nvgpu_channel_worker * nvgpu_channel_worker_from_worker(struct nvgpu_worker *worker) { @@ -713,7 +463,6 @@ nvgpu_channel_worker_from_worker(struct nvgpu_worker *worker) }; #ifdef CONFIG_NVGPU_CHANNEL_WDT - static void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker) { struct nvgpu_channel_worker *ch_worker = @@ -729,6 +478,25 @@ static void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker) } } +/** + * Loop every living channel, check timeouts and handle stuck channels. + */ +static void nvgpu_channel_poll_wdt(struct gk20a *g) +{ + unsigned int chid; + + for (chid = 0; chid < g->fifo.num_channels; chid++) { + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); + + if (ch != NULL) { + if (!nvgpu_channel_check_unserviceable(ch)) { + nvgpu_channel_wdt_check(ch); + } + nvgpu_channel_put(ch); + } + } +} + static void nvgpu_channel_worker_poll_wakeup_post_process_item( struct nvgpu_worker *worker) { @@ -757,9 +525,15 @@ static u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout( return ch_worker->watchdog_interval; } - #endif /* CONFIG_NVGPU_CHANNEL_WDT */ +static inline struct nvgpu_channel * +nvgpu_channel_from_worker_item(struct nvgpu_list_node *node) +{ + return (struct nvgpu_channel *) + ((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item)); +}; + static void nvgpu_channel_worker_poll_wakeup_process_item( struct nvgpu_list_node *work_item) { diff --git a/drivers/gpu/nvgpu/common/fifo/watchdog.c b/drivers/gpu/nvgpu/common/fifo/watchdog.c new file mode 100644 index 000000000..bb59218dc --- /dev/null +++ b/drivers/gpu/nvgpu/common/fifo/watchdog.c @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +static void nvgpu_channel_wdt_init(struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + int ret; + + if (nvgpu_channel_check_unserviceable(ch)) { + ch->wdt.running = false; + return; + } + + ret = nvgpu_timeout_init(g, &ch->wdt.timer, + ch->wdt.limit_ms, + NVGPU_TIMER_CPU_TIMER); + if (ret != 0) { + nvgpu_err(g, "timeout_init failed: %d", ret); + return; + } + + ch->wdt.gp_get = g->ops.userd.gp_get(g, ch); + ch->wdt.pb_get = g->ops.userd.pb_get(g, ch); + ch->wdt.running = true; +} + +/** + * Start a timeout counter (watchdog) on this channel. + * + * Trigger a watchdog to recover the channel after the per-platform timeout + * duration (but strictly no earlier) if the channel hasn't advanced within + * that time. + * + * If the timeout is already running, do nothing. This should be called when + * new jobs are submitted. The timeout will stop when the last tracked job + * finishes, making the channel idle. + * + * The channel's gpfifo read pointer will be used to determine if the job has + * actually stuck at that time. After the timeout duration has expired, a + * worker thread will consider the channel stuck and recover it if stuck. + */ +void nvgpu_channel_wdt_start(struct nvgpu_channel *ch) +{ + if (!nvgpu_is_timeouts_enabled(ch->g)) { + return; + } + + if (!ch->wdt.enabled) { + return; + } + + nvgpu_spinlock_acquire(&ch->wdt.lock); + + if (ch->wdt.running) { + nvgpu_spinlock_release(&ch->wdt.lock); + return; + } + nvgpu_channel_wdt_init(ch); + nvgpu_spinlock_release(&ch->wdt.lock); +} + +/** + * Stop a running timeout counter (watchdog) on this channel. + * + * Make the watchdog consider the channel not running, so that it won't get + * recovered even if no progress is detected. Progress is not tracked if the + * watchdog is turned off. + * + * No guarantees are made about concurrent execution of the timeout handler. + * (This should be called from an update handler running in the same thread + * with the watchdog.) + */ +bool nvgpu_channel_wdt_stop(struct nvgpu_channel *ch) +{ + bool was_running; + + nvgpu_spinlock_acquire(&ch->wdt.lock); + was_running = ch->wdt.running; + ch->wdt.running = false; + nvgpu_spinlock_release(&ch->wdt.lock); + return was_running; +} + +/** + * Continue a previously stopped timeout + * + * Enable the timeout again but don't reinitialize its timer. + * + * No guarantees are made about concurrent execution of the timeout handler. + * (This should be called from an update handler running in the same thread + * with the watchdog.) + */ +void nvgpu_channel_wdt_continue(struct nvgpu_channel *ch) +{ + nvgpu_spinlock_acquire(&ch->wdt.lock); + ch->wdt.running = true; + nvgpu_spinlock_release(&ch->wdt.lock); +} + +/** + * Reset the counter of a timeout that is in effect. + * + * If this channel has an active timeout, act as if something happened on the + * channel right now. + * + * Rewinding a stopped counter is irrelevant; this is a no-op for non-running + * timeouts. Stopped timeouts can only be started (which is technically a + * rewind too) or continued (where the stop is actually pause). + */ +static void nvgpu_channel_wdt_rewind(struct nvgpu_channel *ch) +{ + nvgpu_spinlock_acquire(&ch->wdt.lock); + if (ch->wdt.running) { + nvgpu_channel_wdt_init(ch); + } + nvgpu_spinlock_release(&ch->wdt.lock); +} + +/** + * Rewind the timeout on each non-dormant channel. + * + * Reschedule the timeout of each active channel for which timeouts are running + * as if something was happened on each channel right now. This should be + * called when a global hang is detected that could cause a false positive on + * other innocent channels. + */ +void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g) +{ + struct nvgpu_fifo *f = &g->fifo; + u32 chid; + + for (chid = 0; chid < f->num_channels; chid++) { + struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); + + if (ch != NULL) { + if (!nvgpu_channel_check_unserviceable(ch)) { + nvgpu_channel_wdt_rewind(ch); + } + nvgpu_channel_put(ch); + } + } +} + +/** + * Check if a timed out channel has hung and recover it if it has. + * + * Test if this channel has really got stuck at this point by checking if its + * {gp,pb}_get has advanced or not. If no {gp,pb}_get action happened since + * when the watchdog was started and it's timed out, force-reset the channel. + * + * The gpu is implicitly on at this point, because the watchdog can only run on + * channels that have submitted jobs pending for cleanup. + */ +static void nvgpu_channel_wdt_handler(struct nvgpu_channel *ch) +{ + struct gk20a *g = ch->g; + u32 gp_get; + u32 new_gp_get; + u64 pb_get; + u64 new_pb_get; + + nvgpu_log_fn(g, " "); + + if (nvgpu_channel_check_unserviceable(ch)) { + /* channel is already recovered */ + if (nvgpu_channel_wdt_stop(ch) == true) { + nvgpu_info(g, "chid: %d unserviceable but wdt was ON", + ch->chid); + } + return; + } + + /* Get status but keep timer running */ + nvgpu_spinlock_acquire(&ch->wdt.lock); + gp_get = ch->wdt.gp_get; + pb_get = ch->wdt.pb_get; + nvgpu_spinlock_release(&ch->wdt.lock); + + new_gp_get = g->ops.userd.gp_get(g, ch); + new_pb_get = g->ops.userd.pb_get(g, ch); + + if (new_gp_get != gp_get || new_pb_get != pb_get) { + /* Channel has advanced, timer keeps going but resets */ + nvgpu_channel_wdt_rewind(ch); + } else if (!nvgpu_timeout_peek_expired(&ch->wdt.timer)) { + /* Seems stuck but waiting to time out */ + } else { + nvgpu_err(g, "Job on channel %d timed out", + ch->chid); + + /* force reset calls gk20a_debug_dump but not this */ + if (ch->wdt.debug_dump) { + gk20a_gr_debug_dump(g); + } + +#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL + if (g->ops.tsg.force_reset(ch, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT, + ch->wdt.debug_dump) != 0) { + nvgpu_err(g, "failed tsg force reset for chid: %d", + ch->chid); + } +#endif + } +} + +/** + * Test if the per-channel watchdog is on; check the timeout in that case. + * + * Each channel has an expiration time based watchdog. The timer is + * (re)initialized in two situations: when a new job is submitted on an idle + * channel and when the timeout is checked but progress is detected. The + * watchdog timeout limit is a coarse sliding window. + * + * The timeout is stopped (disabled) after the last job in a row finishes + * and marks the channel idle. + */ +void nvgpu_channel_wdt_check(struct nvgpu_channel *ch) +{ + bool running; + + nvgpu_spinlock_acquire(&ch->wdt.lock); + running = ch->wdt.running; + nvgpu_spinlock_release(&ch->wdt.lock); + + if (running) { + nvgpu_channel_wdt_handler(ch); + } +} diff --git a/drivers/gpu/nvgpu/common/rc/rc.c b/drivers/gpu/nvgpu/common/rc/rc.c index c8a2b8037..5770412a2 100644 --- a/drivers/gpu/nvgpu/common/rc/rc.c +++ b/drivers/gpu/nvgpu/common/rc/rc.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index f2b13287d..998d83e9b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -578,12 +578,6 @@ struct nvgpu_channel { #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT -static inline struct nvgpu_channel * -nvgpu_channel_from_worker_item(struct nvgpu_list_node *node) -{ - return (struct nvgpu_channel *) - ((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item)); -}; int nvgpu_channel_worker_init(struct gk20a *g); void nvgpu_channel_worker_deinit(struct gk20a *g); void nvgpu_channel_update(struct nvgpu_channel *c); @@ -617,10 +611,6 @@ bool nvgpu_channel_is_prealloc_enabled(struct nvgpu_channel *c); bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch, u32 timeout_delta_ms, bool *progress); -#ifdef CONFIG_NVGPU_CHANNEL_WDT -void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g); -#endif - #endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */ static inline bool nvgpu_channel_is_deterministic(struct nvgpu_channel *c) diff --git a/drivers/gpu/nvgpu/include/nvgpu/watchdog.h b/drivers/gpu/nvgpu/include/nvgpu/watchdog.h new file mode 100644 index 000000000..4b8c52c99 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/watchdog.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_WATCHDOG_H +#define NVGPU_WATCHDOG_H + +struct gk20a; +struct nvgpu_channel; +struct nvgpu_worker; + +void nvgpu_channel_wdt_start(struct nvgpu_channel *ch); +void nvgpu_channel_wdt_continue(struct nvgpu_channel *ch); +bool nvgpu_channel_wdt_stop(struct nvgpu_channel *ch); +void nvgpu_channel_wdt_check(struct nvgpu_channel *ch); + +void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g); + +#endif