gpu: nvgpu: decouple channel watchdog dependencies

The channel code needs the watchdog code and vice versa. Cut this
circular dependency with a few simplifications so that the watchdog
wouldn't depend on so much.

When calling watchdog APIs that cause stores or comparisons of channel
progress, provide a snapshot of the current progress instead of a whole
channel pointer. struct nvgpu_channel_wdt_state is added as an interface
for this to track gp_get and pb_get.

When periodically checking the watchdog state, make the channel code ask
whether a hang has been detected and abort the channel from within
channel code instead of asking the watchdog to abort the channel. The
debug dump verbosity flag is also moved back to the channel data.

Move the functionality to restart all channels' watchdogs to channel
code from watchdog code. Looping over active channels is not a good
feature for the watchdog; it's better for the channel handling to just
use the watchdog as a tracking tool.

Move a few unserviceable checks up in the stack to the callers of the
wdt code. They're a kludge but this will do for now and demonstrates
what needs to be eventually fixed.

This does not leave much code in the watchdog unit. Now the purpose of
the watchdog is to only isolate the logic to couple a timer and progress
snapshots with careful locking to start and stop the tracking.

Jira NVGPU-5582

Change-Id: I7c728542ff30d88b1414500210be3fbaf61e6e8a
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2369820
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Hölttä
2020-08-12 18:10:40 +03:00
committed by Alex Waterman
parent 281006ae7d
commit e8201d6ce3
6 changed files with 214 additions and 125 deletions

View File

@@ -459,6 +459,114 @@ nvgpu_channel_worker_from_worker(struct nvgpu_worker *worker)
}; };
#ifdef CONFIG_NVGPU_CHANNEL_WDT #ifdef CONFIG_NVGPU_CHANNEL_WDT
void nvgpu_channel_set_wdt_debug_dump(struct nvgpu_channel *ch, bool dump)
{
ch->wdt_debug_dump = dump;
}
static struct nvgpu_channel_wdt_state nvgpu_channel_collect_wdt_state(
struct nvgpu_channel *ch)
{
struct gk20a *g = ch->g;
struct nvgpu_channel_wdt_state state = { 0, 0 };
/*
* Note: just checking for nvgpu_channel_wdt_enabled() is not enough at
* the moment because system suspend puts g->regs away but doesn't stop
* the worker thread that runs the watchdog. This might need to be
* cleared up in the future.
*/
if (nvgpu_channel_wdt_running(ch->wdt)) {
/*
* Read the state only if the wdt is on to avoid unnecessary
* accesses. The kernel mem for userd may not even exist; this
* channel could be in usermode submit mode.
*/
state.gp_get = g->ops.userd.gp_get(g, ch);
state.pb_get = g->ops.userd.pb_get(g, ch);
}
return state;
}
static void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch)
{
struct nvgpu_channel_wdt_state state = nvgpu_channel_collect_wdt_state(ch);
/*
* FIXME: channel recovery can race the submit path and can start even
* after this, but this check is the best we can do for now.
*/
if (!nvgpu_channel_check_unserviceable(ch)) {
nvgpu_channel_wdt_start(ch->wdt, &state);
}
}
void nvgpu_channel_restart_all_wdts(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
u32 chid;
for (chid = 0; chid < f->num_channels; chid++) {
struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);
if (ch != NULL) {
if ((ch->wdt != NULL) &&
!nvgpu_channel_check_unserviceable(ch)) {
struct nvgpu_channel_wdt_state state =
nvgpu_channel_collect_wdt_state(ch);
nvgpu_channel_wdt_rewind(ch->wdt, &state);
}
nvgpu_channel_put(ch);
}
}
}
static void nvgpu_channel_recover_from_wdt(struct nvgpu_channel *ch)
{
struct gk20a *g = ch->g;
nvgpu_log_fn(g, " ");
if (nvgpu_channel_check_unserviceable(ch)) {
/* channel is already recovered */
nvgpu_info(g, "chid: %d unserviceable but wdt was ON", ch->chid);
return;
}
nvgpu_err(g, "Job on channel %d timed out", ch->chid);
/* force reset calls gk20a_debug_dump but not this */
if (ch->wdt_debug_dump) {
gk20a_gr_debug_dump(g);
}
#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
if (g->ops.tsg.force_reset(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT,
ch->wdt_debug_dump) != 0) {
nvgpu_err(g, "failed tsg force reset for chid: %d", ch->chid);
}
#endif
}
/*
* Test the watchdog progress. If the channel is stuck, reset it.
*
* The gpu is implicitly on at this point because the watchdog can only run on
* channels that have submitted jobs pending for cleanup.
*/
static void nvgpu_channel_check_wdt(struct nvgpu_channel *ch)
{
struct nvgpu_channel_wdt_state state = nvgpu_channel_collect_wdt_state(ch);
if (nvgpu_channel_wdt_check(ch->wdt, &state)) {
nvgpu_channel_recover_from_wdt(ch);
}
}
static void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker) static void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker)
{ {
struct nvgpu_channel_worker *ch_worker = struct nvgpu_channel_worker *ch_worker =
@@ -486,7 +594,7 @@ static void nvgpu_channel_poll_wdt(struct gk20a *g)
if (ch != NULL) { if (ch != NULL) {
if (!nvgpu_channel_check_unserviceable(ch)) { if (!nvgpu_channel_check_unserviceable(ch)) {
nvgpu_channel_wdt_check(ch->wdt, ch); nvgpu_channel_check_wdt(ch);
} }
nvgpu_channel_put(ch); nvgpu_channel_put(ch);
} }
@@ -521,6 +629,8 @@ static u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout(
return ch_worker->watchdog_interval; return ch_worker->watchdog_interval;
} }
#else
static void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch) {}
#endif /* CONFIG_NVGPU_CHANNEL_WDT */ #endif /* CONFIG_NVGPU_CHANNEL_WDT */
static inline struct nvgpu_channel * static inline struct nvgpu_channel *
@@ -635,7 +745,7 @@ int nvgpu_channel_add_job(struct nvgpu_channel *c,
job->num_mapped_buffers = num_mapped_buffers; job->num_mapped_buffers = num_mapped_buffers;
job->mapped_buffers = mapped_buffers; job->mapped_buffers = mapped_buffers;
nvgpu_channel_wdt_start(c->wdt, c); nvgpu_channel_launch_wdt(c);
nvgpu_channel_joblist_lock(c); nvgpu_channel_joblist_lock(c);
nvgpu_channel_joblist_add(c, job); nvgpu_channel_joblist_add(c, job);
@@ -1456,11 +1566,12 @@ NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
ch->unserviceable = true; ch->unserviceable = true;
#ifdef CONFIG_NVGPU_CHANNEL_WDT #ifdef CONFIG_NVGPU_CHANNEL_WDT
ch->wdt = nvgpu_channel_wdt_alloc(ch); ch->wdt = nvgpu_channel_wdt_alloc(g);
if (ch->wdt == NULL) { if (ch->wdt == NULL) {
nvgpu_err(g, "wdt alloc failed"); nvgpu_err(g, "wdt alloc failed");
goto clean_up; goto clean_up;
} }
ch->wdt_debug_dump = true;
#endif #endif
ch->obj_class = 0; ch->obj_class = 0;

View File

@@ -25,6 +25,7 @@
#include <nvgpu/watchdog.h> #include <nvgpu/watchdog.h>
#include <nvgpu/error_notifier.h> #include <nvgpu/error_notifier.h>
#include <nvgpu/watchdog.h> #include <nvgpu/watchdog.h>
#include <nvgpu/string.h>
struct nvgpu_channel_wdt { struct nvgpu_channel_wdt {
struct gk20a *g; struct gk20a *g;
@@ -33,18 +34,15 @@ struct nvgpu_channel_wdt {
struct nvgpu_spinlock lock; struct nvgpu_spinlock lock;
struct nvgpu_timeout timer; struct nvgpu_timeout timer;
bool running; bool running;
u32 gp_get; struct nvgpu_channel_wdt_state ch_state;
u64 pb_get;
/* lock not needed */ /* lock not needed */
u32 limit_ms; u32 limit_ms;
bool enabled; bool enabled;
bool debug_dump;
}; };
struct nvgpu_channel_wdt *nvgpu_channel_wdt_alloc(struct nvgpu_channel *ch) struct nvgpu_channel_wdt *nvgpu_channel_wdt_alloc(struct gk20a *g)
{ {
struct gk20a *g = ch->g;
struct nvgpu_channel_wdt *wdt = nvgpu_kzalloc(g, sizeof(*wdt)); struct nvgpu_channel_wdt *wdt = nvgpu_kzalloc(g, sizeof(*wdt));
if (wdt == NULL) { if (wdt == NULL) {
@@ -55,7 +53,6 @@ struct nvgpu_channel_wdt *nvgpu_channel_wdt_alloc(struct nvgpu_channel *ch)
nvgpu_spinlock_init(&wdt->lock); nvgpu_spinlock_init(&wdt->lock);
wdt->enabled = true; wdt->enabled = true;
wdt->limit_ms = g->ch_wdt_init_limit_ms; wdt->limit_ms = g->ch_wdt_init_limit_ms;
wdt->debug_dump = true;
return wdt; return wdt;
} }
@@ -90,22 +87,12 @@ u32 nvgpu_channel_wdt_limit(struct nvgpu_channel_wdt *wdt)
return wdt->limit_ms; return wdt->limit_ms;
} }
void nvgpu_channel_wdt_set_debug_dump(struct nvgpu_channel_wdt *wdt, bool dump)
{
wdt->debug_dump = dump;
}
static void nvgpu_channel_wdt_init(struct nvgpu_channel_wdt *wdt, static void nvgpu_channel_wdt_init(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel *ch) struct nvgpu_channel_wdt_state *state)
{ {
struct gk20a *g = wdt->g; struct gk20a *g = wdt->g;
int ret; int ret;
if (nvgpu_channel_check_unserviceable(ch)) {
wdt->running = false;
return;
}
ret = nvgpu_timeout_init(g, &wdt->timer, ret = nvgpu_timeout_init(g, &wdt->timer,
wdt->limit_ms, wdt->limit_ms,
NVGPU_TIMER_CPU_TIMER); NVGPU_TIMER_CPU_TIMER);
@@ -114,8 +101,7 @@ static void nvgpu_channel_wdt_init(struct nvgpu_channel_wdt *wdt,
return; return;
} }
wdt->gp_get = g->ops.userd.gp_get(g, ch); wdt->ch_state = *state;
wdt->pb_get = g->ops.userd.pb_get(g, ch);
wdt->running = true; wdt->running = true;
} }
@@ -129,13 +115,9 @@ static void nvgpu_channel_wdt_init(struct nvgpu_channel_wdt *wdt,
* If the timeout is already running, do nothing. This should be called when * If the timeout is already running, do nothing. This should be called when
* new jobs are submitted. The timeout will stop when the last tracked job * new jobs are submitted. The timeout will stop when the last tracked job
* finishes, making the channel idle. * finishes, making the channel idle.
*
* The channel's gpfifo read pointer will be used to determine if the job has
* actually stuck at that time. After the timeout duration has expired, a
* worker thread will consider the channel stuck and recover it if stuck.
*/ */
void nvgpu_channel_wdt_start(struct nvgpu_channel_wdt *wdt, void nvgpu_channel_wdt_start(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel *ch) struct nvgpu_channel_wdt_state *state)
{ {
if (!nvgpu_is_timeouts_enabled(wdt->g)) { if (!nvgpu_is_timeouts_enabled(wdt->g)) {
return; return;
@@ -151,7 +133,7 @@ void nvgpu_channel_wdt_start(struct nvgpu_channel_wdt *wdt,
nvgpu_spinlock_release(&wdt->lock); nvgpu_spinlock_release(&wdt->lock);
return; return;
} }
nvgpu_channel_wdt_init(wdt, ch); nvgpu_channel_wdt_init(wdt, state);
nvgpu_spinlock_release(&wdt->lock); nvgpu_spinlock_release(&wdt->lock);
} }
@@ -203,103 +185,69 @@ void nvgpu_channel_wdt_continue(struct nvgpu_channel_wdt *wdt)
* timeouts. Stopped timeouts can only be started (which is technically a * timeouts. Stopped timeouts can only be started (which is technically a
* rewind too) or continued (where the stop is actually pause). * rewind too) or continued (where the stop is actually pause).
*/ */
static void nvgpu_channel_wdt_rewind(struct nvgpu_channel_wdt *wdt, void nvgpu_channel_wdt_rewind(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel *ch) struct nvgpu_channel_wdt_state *state)
{ {
nvgpu_spinlock_acquire(&wdt->lock); nvgpu_spinlock_acquire(&wdt->lock);
if (wdt->running) { if (wdt->running) {
nvgpu_channel_wdt_init(wdt, ch); nvgpu_channel_wdt_init(wdt, state);
} }
nvgpu_spinlock_release(&wdt->lock); nvgpu_spinlock_release(&wdt->lock);
} }
/** /**
* Rewind the timeout on each non-dormant channel. * Check if the watchdog is running.
* *
* Reschedule the timeout of each active channel for which timeouts are running * A running watchdog means one that is requested to run and expire in the
* as if something was happened on each channel right now. This should be * future. The state of a running watchdog has to be checked periodically to
* called when a global hang is detected that could cause a false positive on * see if it's expired.
* other innocent channels.
*/ */
void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g) bool nvgpu_channel_wdt_running(struct nvgpu_channel_wdt *wdt)
{ {
struct nvgpu_fifo *f = &g->fifo; bool running;
u32 chid;
for (chid = 0; chid < f->num_channels; chid++) { nvgpu_spinlock_acquire(&wdt->lock);
struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid); running = wdt->running;
nvgpu_spinlock_release(&wdt->lock);
if (ch != NULL) { return running;
if ((ch->wdt != NULL) &&
(!nvgpu_channel_check_unserviceable(ch))) {
nvgpu_channel_wdt_rewind(ch->wdt, ch);
}
nvgpu_channel_put(ch);
}
}
} }
/** /**
* Check if a timed out channel has hung and recover it if it has. * Check if a channel has been stuck for the watchdog limit.
* *
* Test if this channel has really got stuck at this point by checking if its * Test if this channel has really got stuck at this point by checking if its
* {gp,pb}_get has advanced or not. If no {gp,pb}_get action happened since * {gp,pb}_get have advanced or not. If progress was detected, start the timer
* when the watchdog was started and it's timed out, force-reset the channel. * from zero again. If no {gp,pb}_get action happened in the watchdog time
* * limit, return true. Else return false.
* The gpu is implicitly on at this point, because the watchdog can only run on
* channels that have submitted jobs pending for cleanup.
*/ */
static void nvgpu_channel_wdt_handler(struct nvgpu_channel_wdt *wdt, static bool nvgpu_channel_wdt_handler(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel *ch) struct nvgpu_channel_wdt_state *state)
{ {
struct gk20a *g = wdt->g; struct gk20a *g = wdt->g;
u32 gp_get; struct nvgpu_channel_wdt_state previous_state;
u32 new_gp_get;
u64 pb_get;
u64 new_pb_get;
nvgpu_log_fn(g, " "); nvgpu_log_fn(g, " ");
if (nvgpu_channel_check_unserviceable(ch)) {
/* channel is already recovered */
if (nvgpu_channel_wdt_stop(wdt) == true) {
nvgpu_info(g, "chid: %d unserviceable but wdt was ON",
ch->chid);
}
return;
}
/* Get status but keep timer running */ /* Get status but keep timer running */
nvgpu_spinlock_acquire(&wdt->lock); nvgpu_spinlock_acquire(&wdt->lock);
gp_get = wdt->gp_get; previous_state = wdt->ch_state;
pb_get = wdt->pb_get;
nvgpu_spinlock_release(&wdt->lock); nvgpu_spinlock_release(&wdt->lock);
new_gp_get = g->ops.userd.gp_get(g, ch); if (nvgpu_memcmp((const u8 *)state,
new_pb_get = g->ops.userd.pb_get(g, ch); (const u8 *)&previous_state,
sizeof(*state)) != 0) {
if (new_gp_get != gp_get || new_pb_get != pb_get) {
/* Channel has advanced, timer keeps going but resets */ /* Channel has advanced, timer keeps going but resets */
nvgpu_channel_wdt_rewind(wdt, ch); nvgpu_channel_wdt_rewind(wdt, state);
} else if (!nvgpu_timeout_peek_expired(&wdt->timer)) { return false;
/* Seems stuck but waiting to time out */
} else {
nvgpu_err(g, "Job on channel %d timed out", ch->chid);
/* force reset calls gk20a_debug_dump but not this */
if (wdt->debug_dump) {
gk20a_gr_debug_dump(g);
}
#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
if (g->ops.tsg.force_reset(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT,
wdt->debug_dump) != 0) {
nvgpu_err(g, "failed tsg force reset for chid: %d",
ch->chid);
}
#endif
} }
if (!nvgpu_timeout_peek_expired(&wdt->timer)) {
/* Seems stuck but waiting to time out */
return false;
}
return true;
} }
/** /**
@@ -313,8 +261,8 @@ static void nvgpu_channel_wdt_handler(struct nvgpu_channel_wdt *wdt,
* The timeout is stopped (disabled) after the last job in a row finishes * The timeout is stopped (disabled) after the last job in a row finishes
* and marks the channel idle. * and marks the channel idle.
*/ */
void nvgpu_channel_wdt_check(struct nvgpu_channel_wdt *wdt, bool nvgpu_channel_wdt_check(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel *ch) struct nvgpu_channel_wdt_state *state)
{ {
bool running; bool running;
@@ -323,6 +271,8 @@ void nvgpu_channel_wdt_check(struct nvgpu_channel_wdt *wdt,
nvgpu_spinlock_release(&wdt->lock); nvgpu_spinlock_release(&wdt->lock);
if (running) { if (running) {
nvgpu_channel_wdt_handler(wdt, ch); return nvgpu_channel_wdt_handler(wdt, state);
} else {
return false;
} }
} }

View File

@@ -26,7 +26,6 @@
#include <nvgpu/engines.h> #include <nvgpu/engines.h>
#include <nvgpu/debug.h> #include <nvgpu/debug.h>
#include <nvgpu/channel.h> #include <nvgpu/channel.h>
#include <nvgpu/watchdog.h>
#include <nvgpu/tsg.h> #include <nvgpu/tsg.h>
#include <nvgpu/error_notifier.h> #include <nvgpu/error_notifier.h>
#include <nvgpu/nvgpu_err.h> #include <nvgpu/nvgpu_err.h>
@@ -71,10 +70,12 @@ void nvgpu_rc_ctxsw_timeout(struct gk20a *g, u32 eng_bitmask,
#ifdef CONFIG_NVGPU_RECOVERY #ifdef CONFIG_NVGPU_RECOVERY
/* /*
* Cancel all channels' wdt since ctxsw timeout might * Cancel all channels' wdt since ctxsw timeout causes the runlist to
* trigger multiple watchdogs at a time * stuck and might falsely trigger multiple watchdogs at a time. We
* won't detect proper wdt timeouts that would have happened, but if
* they're stuck, they will trigger the wdt soon enough again.
*/ */
nvgpu_channel_wdt_restart_all_channels(g); nvgpu_channel_restart_all_wdts(g);
nvgpu_rc_fifo_recover(g, eng_bitmask, tsg->tsgid, true, true, debug_dump, nvgpu_rc_fifo_recover(g, eng_bitmask, tsg->tsgid, true, true, debug_dump,
RC_TYPE_CTXSW_TIMEOUT); RC_TYPE_CTXSW_TIMEOUT);

View File

@@ -377,6 +377,7 @@ struct nvgpu_channel {
/* kernel watchdog to kill stuck jobs */ /* kernel watchdog to kill stuck jobs */
struct nvgpu_channel_wdt *wdt; struct nvgpu_channel_wdt *wdt;
bool wdt_debug_dump;
/** Fence allocator in case of deterministic submit. */ /** Fence allocator in case of deterministic submit. */
struct nvgpu_allocator fence_allocator; struct nvgpu_allocator fence_allocator;
@@ -1161,4 +1162,27 @@ int nvgpu_channel_deferred_reset_engines(struct gk20a *g,
struct nvgpu_channel *ch); struct nvgpu_channel *ch);
#endif #endif
#ifdef CONFIG_NVGPU_CHANNEL_WDT
/**
* @brief Rewind the timeout on each non-dormant channel.
*
* Reschedule the timeout of each active channel for which timeouts are running
* as if something was happened on each channel right now. This should be
* called when a global hang is detected that could cause a false positive on
* other innocent channels.
*/
void nvgpu_channel_restart_all_wdts(struct gk20a *g);
/**
* @brief Enable or disable full debug dump on wdt error.
*
* Set the policy on whether or not to do the verbose channel and gr debug dump
* when the channel gets recovered as a result of a watchdog timeout.
*/
void nvgpu_channel_set_wdt_debug_dump(struct nvgpu_channel *ch, bool dump);
#else
static inline void nvgpu_channel_restart_all_wdts(struct gk20a *g) {}
static inline void nvgpu_channel_set_wdt_debug_dump(struct nvgpu_channel *ch,
bool dump) {}
#endif
#endif #endif

View File

@@ -23,14 +23,19 @@
#ifndef NVGPU_WATCHDOG_H #ifndef NVGPU_WATCHDOG_H
#define NVGPU_WATCHDOG_H #define NVGPU_WATCHDOG_H
#ifdef CONFIG_NVGPU_CHANNEL_WDT #include <nvgpu/types.h>
struct gk20a; struct gk20a;
struct nvgpu_channel;
struct nvgpu_worker;
struct nvgpu_channel_wdt; struct nvgpu_channel_wdt;
struct nvgpu_channel_wdt *nvgpu_channel_wdt_alloc(struct nvgpu_channel *ch); struct nvgpu_channel_wdt_state {
u64 gp_get;
u64 pb_get;
};
#ifdef CONFIG_NVGPU_CHANNEL_WDT
struct nvgpu_channel_wdt *nvgpu_channel_wdt_alloc(struct gk20a *g);
void nvgpu_channel_wdt_destroy(struct nvgpu_channel_wdt *wdt); void nvgpu_channel_wdt_destroy(struct nvgpu_channel_wdt *wdt);
void nvgpu_channel_wdt_enable(struct nvgpu_channel_wdt *wdt); void nvgpu_channel_wdt_enable(struct nvgpu_channel_wdt *wdt);
@@ -39,21 +44,21 @@ bool nvgpu_channel_wdt_enabled(struct nvgpu_channel_wdt *wdt);
void nvgpu_channel_wdt_set_limit(struct nvgpu_channel_wdt *wdt, u32 limit_ms); void nvgpu_channel_wdt_set_limit(struct nvgpu_channel_wdt *wdt, u32 limit_ms);
u32 nvgpu_channel_wdt_limit(struct nvgpu_channel_wdt *wdt); u32 nvgpu_channel_wdt_limit(struct nvgpu_channel_wdt *wdt);
void nvgpu_channel_wdt_set_debug_dump(struct nvgpu_channel_wdt *wdt, bool dump);
void nvgpu_channel_wdt_start(struct nvgpu_channel_wdt *wdt, void nvgpu_channel_wdt_start(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel *ch); struct nvgpu_channel_wdt_state *state);
void nvgpu_channel_wdt_continue(struct nvgpu_channel_wdt *wdt);
bool nvgpu_channel_wdt_stop(struct nvgpu_channel_wdt *wdt); bool nvgpu_channel_wdt_stop(struct nvgpu_channel_wdt *wdt);
void nvgpu_channel_wdt_check(struct nvgpu_channel_wdt *wdt, void nvgpu_channel_wdt_continue(struct nvgpu_channel_wdt *wdt);
struct nvgpu_channel *ch); void nvgpu_channel_wdt_rewind(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel_wdt_state *state);
void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g); bool nvgpu_channel_wdt_running(struct nvgpu_channel_wdt *wdt);
bool nvgpu_channel_wdt_check(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel_wdt_state *state);
#else /* CONFIG_NVGPU_CHANNEL_WDT */ #else /* CONFIG_NVGPU_CHANNEL_WDT */
static inline struct nvgpu_channel_wdt *nvgpu_channel_wdt_alloc( static inline struct nvgpu_channel_wdt *nvgpu_channel_wdt_alloc(
struct nvgpu_channel *ch) struct gk20a *g)
{ {
return NULL; return NULL;
} }
@@ -71,21 +76,19 @@ static inline u32 nvgpu_channel_wdt_limit(struct nvgpu_channel_wdt *wdt)
{ {
return 0U; return 0U;
} }
static inline void nvgpu_channel_wdt_set_debug_dump(
struct nvgpu_channel_wdt *wdt,
bool dump) {}
static inline void nvgpu_channel_wdt_start(struct nvgpu_channel_wdt *wdt, static inline void nvgpu_channel_wdt_start(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel *ch) {} struct nvgpu_channel_wdt_state *state) {}
static inline void nvgpu_channel_wdt_continue(struct nvgpu_channel_wdt *wdt) {}
static inline bool nvgpu_channel_wdt_stop(struct nvgpu_channel_wdt *wdt) static inline bool nvgpu_channel_wdt_stop(struct nvgpu_channel_wdt *wdt)
{ {
return false; return false;
} }
static inline void nvgpu_channel_wdt_check(struct nvgpu_channel_wdt *wdt, static inline void nvgpu_channel_wdt_continue(struct nvgpu_channel_wdt *wdt) {}
struct nvgpu_channel *ch) {} static inline void nvgpu_channel_wdt_rewind(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel_wdt_state *state) {}
static inline void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g) {} static inline bool nvgpu_channel_wdt_check(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel_wdt_state *state) {
return false;
}
#endif /* CONFIG_NVGPU_CHANNEL_WDT */ #endif /* CONFIG_NVGPU_CHANNEL_WDT */

View File

@@ -322,7 +322,7 @@ static int gk20a_channel_set_wdt_status(struct nvgpu_channel *ch,
if (set_timeout) if (set_timeout)
nvgpu_channel_wdt_set_limit(ch->wdt, args->timeout_ms); nvgpu_channel_wdt_set_limit(ch->wdt, args->timeout_ms);
nvgpu_channel_wdt_set_debug_dump(ch->wdt, !disable_dump); nvgpu_channel_set_wdt_debug_dump(ch, !disable_dump);
return 0; return 0;
#else #else