mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
gpu: nvgpu: rename timeout of channel struct to wdt
Rename channel_gk20a_timeout to nvgpu_channel_wdt. Rename timeout variable of channel_gk20a struct to wdt. Rename ch_wdt_timeout_ms to ch_wdt_init_limit_ms. Rename gk20a_channel_timeout_* to nvgpu_channel_wdt_* JIRA NVGPU-1312 Change-Id: Ida78426cc007b53f3d407cf85428d15f7fe7518a Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2077641 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
737de7eac5
commit
9393e2a90a
@@ -746,9 +746,9 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
|
|||||||
ch->unserviceable = false;
|
ch->unserviceable = false;
|
||||||
|
|
||||||
/* init kernel watchdog timeout */
|
/* init kernel watchdog timeout */
|
||||||
ch->timeout.enabled = true;
|
ch->wdt.enabled = true;
|
||||||
ch->timeout.limit_ms = g->ch_wdt_timeout_ms;
|
ch->wdt.limit_ms = g->ch_wdt_init_limit_ms;
|
||||||
ch->timeout.debug_dump = true;
|
ch->wdt.debug_dump = true;
|
||||||
|
|
||||||
ch->obj_class = 0;
|
ch->obj_class = 0;
|
||||||
ch->subctx_id = 0;
|
ch->subctx_id = 0;
|
||||||
@@ -1282,10 +1282,10 @@ int nvgpu_channel_setup_bind(struct channel_gk20a *c,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!nvgpu_is_timeouts_enabled(c->g) || !c->timeout.enabled) {
|
if (!nvgpu_is_timeouts_enabled(c->g) || !c->wdt.enabled) {
|
||||||
acquire_timeout = 0;
|
acquire_timeout = 0;
|
||||||
} else {
|
} else {
|
||||||
acquire_timeout = c->timeout.limit_ms;
|
acquire_timeout = c->wdt.limit_ms;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = g->ops.fifo.setup_ramfc(c, gpfifo_gpu_va,
|
err = g->ops.fifo.setup_ramfc(c, gpfifo_gpu_va,
|
||||||
@@ -1507,20 +1507,20 @@ u32 nvgpu_get_gp_free_count(struct channel_gk20a *c)
|
|||||||
return nvgpu_gp_free_count(c);
|
return nvgpu_gp_free_count(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
|
static void nvgpu_channel_wdt_init(struct channel_gk20a *ch)
|
||||||
{
|
{
|
||||||
struct gk20a *g = ch->g;
|
struct gk20a *g = ch->g;
|
||||||
|
|
||||||
if (gk20a_channel_check_unserviceable(ch)) {
|
if (gk20a_channel_check_unserviceable(ch)) {
|
||||||
ch->timeout.running = false;
|
ch->wdt.running = false;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ch->timeout.gp_get = g->ops.userd.gp_get(g, ch);
|
ch->wdt.gp_get = g->ops.userd.gp_get(g, ch);
|
||||||
ch->timeout.pb_get = g->ops.userd.pb_get(g, ch);
|
ch->wdt.pb_get = g->ops.userd.pb_get(g, ch);
|
||||||
ch->timeout.running = true;
|
ch->wdt.running = true;
|
||||||
nvgpu_timeout_init(g, &ch->timeout.timer,
|
nvgpu_timeout_init(g, &ch->wdt.timer,
|
||||||
ch->timeout.limit_ms,
|
ch->wdt.limit_ms,
|
||||||
NVGPU_TIMER_CPU_TIMER);
|
NVGPU_TIMER_CPU_TIMER);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1539,24 +1539,24 @@ static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
|
|||||||
* actually stuck at that time. After the timeout duration has expired, a
|
* actually stuck at that time. After the timeout duration has expired, a
|
||||||
* worker thread will consider the channel stuck and recover it if stuck.
|
* worker thread will consider the channel stuck and recover it if stuck.
|
||||||
*/
|
*/
|
||||||
static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
|
static void nvgpu_channel_wdt_start(struct channel_gk20a *ch)
|
||||||
{
|
{
|
||||||
if (!nvgpu_is_timeouts_enabled(ch->g)) {
|
if (!nvgpu_is_timeouts_enabled(ch->g)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ch->timeout.enabled) {
|
if (!ch->wdt.enabled) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
nvgpu_spinlock_acquire(&ch->timeout.lock);
|
nvgpu_spinlock_acquire(&ch->wdt.lock);
|
||||||
|
|
||||||
if (ch->timeout.running) {
|
if (ch->wdt.running) {
|
||||||
nvgpu_spinlock_release(&ch->timeout.lock);
|
nvgpu_spinlock_release(&ch->wdt.lock);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
__gk20a_channel_timeout_start(ch);
|
nvgpu_channel_wdt_init(ch);
|
||||||
nvgpu_spinlock_release(&ch->timeout.lock);
|
nvgpu_spinlock_release(&ch->wdt.lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -1570,14 +1570,14 @@ static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
|
|||||||
* (This should be called from an update handler running in the same thread
|
* (This should be called from an update handler running in the same thread
|
||||||
* with the watchdog.)
|
* with the watchdog.)
|
||||||
*/
|
*/
|
||||||
static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch)
|
static bool nvgpu_channel_wdt_stop(struct channel_gk20a *ch)
|
||||||
{
|
{
|
||||||
bool was_running;
|
bool was_running;
|
||||||
|
|
||||||
nvgpu_spinlock_acquire(&ch->timeout.lock);
|
nvgpu_spinlock_acquire(&ch->wdt.lock);
|
||||||
was_running = ch->timeout.running;
|
was_running = ch->wdt.running;
|
||||||
ch->timeout.running = false;
|
ch->wdt.running = false;
|
||||||
nvgpu_spinlock_release(&ch->timeout.lock);
|
nvgpu_spinlock_release(&ch->wdt.lock);
|
||||||
return was_running;
|
return was_running;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1590,11 +1590,11 @@ static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch)
|
|||||||
* (This should be called from an update handler running in the same thread
|
* (This should be called from an update handler running in the same thread
|
||||||
* with the watchdog.)
|
* with the watchdog.)
|
||||||
*/
|
*/
|
||||||
static void gk20a_channel_timeout_continue(struct channel_gk20a *ch)
|
static void nvgpu_channel_wdt_continue(struct channel_gk20a *ch)
|
||||||
{
|
{
|
||||||
nvgpu_spinlock_acquire(&ch->timeout.lock);
|
nvgpu_spinlock_acquire(&ch->wdt.lock);
|
||||||
ch->timeout.running = true;
|
ch->wdt.running = true;
|
||||||
nvgpu_spinlock_release(&ch->timeout.lock);
|
nvgpu_spinlock_release(&ch->wdt.lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -1607,13 +1607,13 @@ static void gk20a_channel_timeout_continue(struct channel_gk20a *ch)
|
|||||||
* timeouts. Stopped timeouts can only be started (which is technically a
|
* timeouts. Stopped timeouts can only be started (which is technically a
|
||||||
* rewind too) or continued (where the stop is actually pause).
|
* rewind too) or continued (where the stop is actually pause).
|
||||||
*/
|
*/
|
||||||
static void gk20a_channel_timeout_rewind(struct channel_gk20a *ch)
|
static void nvgpu_channel_wdt_rewind(struct channel_gk20a *ch)
|
||||||
{
|
{
|
||||||
nvgpu_spinlock_acquire(&ch->timeout.lock);
|
nvgpu_spinlock_acquire(&ch->wdt.lock);
|
||||||
if (ch->timeout.running) {
|
if (ch->wdt.running) {
|
||||||
__gk20a_channel_timeout_start(ch);
|
nvgpu_channel_wdt_init(ch);
|
||||||
}
|
}
|
||||||
nvgpu_spinlock_release(&ch->timeout.lock);
|
nvgpu_spinlock_release(&ch->wdt.lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -1624,7 +1624,7 @@ static void gk20a_channel_timeout_rewind(struct channel_gk20a *ch)
|
|||||||
* called when a global hang is detected that could cause a false positive on
|
* called when a global hang is detected that could cause a false positive on
|
||||||
* other innocent channels.
|
* other innocent channels.
|
||||||
*/
|
*/
|
||||||
void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
|
void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g)
|
||||||
{
|
{
|
||||||
struct fifo_gk20a *f = &g->fifo;
|
struct fifo_gk20a *f = &g->fifo;
|
||||||
u32 chid;
|
u32 chid;
|
||||||
@@ -1634,7 +1634,7 @@ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
|
|||||||
|
|
||||||
if (ch != NULL) {
|
if (ch != NULL) {
|
||||||
if (!gk20a_channel_check_unserviceable(ch)) {
|
if (!gk20a_channel_check_unserviceable(ch)) {
|
||||||
gk20a_channel_timeout_rewind(ch);
|
nvgpu_channel_wdt_rewind(ch);
|
||||||
}
|
}
|
||||||
gk20a_channel_put(ch);
|
gk20a_channel_put(ch);
|
||||||
}
|
}
|
||||||
@@ -1651,7 +1651,7 @@ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
|
|||||||
* The gpu is implicitly on at this point, because the watchdog can only run on
|
* The gpu is implicitly on at this point, because the watchdog can only run on
|
||||||
* channels that have submitted jobs pending for cleanup.
|
* channels that have submitted jobs pending for cleanup.
|
||||||
*/
|
*/
|
||||||
static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
|
static void nvgpu_channel_wdt_handler(struct channel_gk20a *ch)
|
||||||
{
|
{
|
||||||
struct gk20a *g = ch->g;
|
struct gk20a *g = ch->g;
|
||||||
u32 gp_get;
|
u32 gp_get;
|
||||||
@@ -1663,36 +1663,36 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
|
|||||||
|
|
||||||
if (gk20a_channel_check_unserviceable(ch)) {
|
if (gk20a_channel_check_unserviceable(ch)) {
|
||||||
/* channel is already recovered */
|
/* channel is already recovered */
|
||||||
gk20a_channel_timeout_stop(ch);
|
nvgpu_channel_wdt_stop(ch);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get status but keep timer running */
|
/* Get status but keep timer running */
|
||||||
nvgpu_spinlock_acquire(&ch->timeout.lock);
|
nvgpu_spinlock_acquire(&ch->wdt.lock);
|
||||||
gp_get = ch->timeout.gp_get;
|
gp_get = ch->wdt.gp_get;
|
||||||
pb_get = ch->timeout.pb_get;
|
pb_get = ch->wdt.pb_get;
|
||||||
nvgpu_spinlock_release(&ch->timeout.lock);
|
nvgpu_spinlock_release(&ch->wdt.lock);
|
||||||
|
|
||||||
new_gp_get = g->ops.userd.gp_get(g, ch);
|
new_gp_get = g->ops.userd.gp_get(g, ch);
|
||||||
new_pb_get = g->ops.userd.pb_get(g, ch);
|
new_pb_get = g->ops.userd.pb_get(g, ch);
|
||||||
|
|
||||||
if (new_gp_get != gp_get || new_pb_get != pb_get) {
|
if (new_gp_get != gp_get || new_pb_get != pb_get) {
|
||||||
/* Channel has advanced, timer keeps going but resets */
|
/* Channel has advanced, timer keeps going but resets */
|
||||||
gk20a_channel_timeout_rewind(ch);
|
nvgpu_channel_wdt_rewind(ch);
|
||||||
} else if (nvgpu_timeout_peek_expired(&ch->timeout.timer) == 0) {
|
} else if (nvgpu_timeout_peek_expired(&ch->wdt.timer) == 0) {
|
||||||
/* Seems stuck but waiting to time out */
|
/* Seems stuck but waiting to time out */
|
||||||
} else {
|
} else {
|
||||||
nvgpu_err(g, "Job on channel %d timed out",
|
nvgpu_err(g, "Job on channel %d timed out",
|
||||||
ch->chid);
|
ch->chid);
|
||||||
|
|
||||||
/* force reset calls gk20a_debug_dump but not this */
|
/* force reset calls gk20a_debug_dump but not this */
|
||||||
if (ch->timeout.debug_dump) {
|
if (ch->wdt.debug_dump) {
|
||||||
gk20a_gr_debug_dump(g);
|
gk20a_gr_debug_dump(g);
|
||||||
}
|
}
|
||||||
|
|
||||||
g->ops.fifo.force_reset_ch(ch,
|
g->ops.fifo.force_reset_ch(ch,
|
||||||
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT,
|
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT,
|
||||||
ch->timeout.debug_dump);
|
ch->wdt.debug_dump);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1707,23 +1707,23 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
|
|||||||
* The timeout is stopped (disabled) after the last job in a row finishes
|
* The timeout is stopped (disabled) after the last job in a row finishes
|
||||||
* and marks the channel idle.
|
* and marks the channel idle.
|
||||||
*/
|
*/
|
||||||
static void gk20a_channel_timeout_check(struct channel_gk20a *ch)
|
static void nvgpu_channel_wdt_check(struct channel_gk20a *ch)
|
||||||
{
|
{
|
||||||
bool running;
|
bool running;
|
||||||
|
|
||||||
nvgpu_spinlock_acquire(&ch->timeout.lock);
|
nvgpu_spinlock_acquire(&ch->wdt.lock);
|
||||||
running = ch->timeout.running;
|
running = ch->wdt.running;
|
||||||
nvgpu_spinlock_release(&ch->timeout.lock);
|
nvgpu_spinlock_release(&ch->wdt.lock);
|
||||||
|
|
||||||
if (running) {
|
if (running) {
|
||||||
gk20a_channel_timeout_handler(ch);
|
nvgpu_channel_wdt_handler(ch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loop every living channel, check timeouts and handle stuck channels.
|
* Loop every living channel, check timeouts and handle stuck channels.
|
||||||
*/
|
*/
|
||||||
static void gk20a_channel_poll_timeouts(struct gk20a *g)
|
static void nvgpu_channel_poll_wdt(struct gk20a *g)
|
||||||
{
|
{
|
||||||
unsigned int chid;
|
unsigned int chid;
|
||||||
|
|
||||||
@@ -1733,7 +1733,7 @@ static void gk20a_channel_poll_timeouts(struct gk20a *g)
|
|||||||
|
|
||||||
if (ch != NULL) {
|
if (ch != NULL) {
|
||||||
if (!gk20a_channel_check_unserviceable(ch)) {
|
if (!gk20a_channel_check_unserviceable(ch)) {
|
||||||
gk20a_channel_timeout_check(ch);
|
nvgpu_channel_wdt_check(ch);
|
||||||
}
|
}
|
||||||
gk20a_channel_put(ch);
|
gk20a_channel_put(ch);
|
||||||
}
|
}
|
||||||
@@ -1879,7 +1879,7 @@ static int gk20a_channel_poll_worker(void *arg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (nvgpu_timeout_peek_expired(&timeout) != 0) {
|
if (nvgpu_timeout_peek_expired(&timeout) != 0) {
|
||||||
gk20a_channel_poll_timeouts(g);
|
nvgpu_channel_poll_wdt(g);
|
||||||
nvgpu_timeout_init(g, &timeout, watchdog_interval,
|
nvgpu_timeout_init(g, &timeout, watchdog_interval,
|
||||||
NVGPU_TIMER_CPU_TIMER);
|
NVGPU_TIMER_CPU_TIMER);
|
||||||
}
|
}
|
||||||
@@ -2057,7 +2057,7 @@ int gk20a_channel_add_job(struct channel_gk20a *c,
|
|||||||
job->num_mapped_buffers = num_mapped_buffers;
|
job->num_mapped_buffers = num_mapped_buffers;
|
||||||
job->mapped_buffers = mapped_buffers;
|
job->mapped_buffers = mapped_buffers;
|
||||||
|
|
||||||
gk20a_channel_timeout_start(c);
|
nvgpu_channel_wdt_start(c);
|
||||||
|
|
||||||
if (!pre_alloc_enabled) {
|
if (!pre_alloc_enabled) {
|
||||||
channel_gk20a_joblist_lock(c);
|
channel_gk20a_joblist_lock(c);
|
||||||
@@ -2124,7 +2124,7 @@ void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
|
|||||||
* anyway (this would be a no-op).
|
* anyway (this would be a no-op).
|
||||||
*/
|
*/
|
||||||
if (clean_all) {
|
if (clean_all) {
|
||||||
watchdog_on = gk20a_channel_timeout_stop(c);
|
watchdog_on = nvgpu_channel_wdt_stop(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Synchronize with abort cleanup that needs the jobs. */
|
/* Synchronize with abort cleanup that needs the jobs. */
|
||||||
@@ -2162,7 +2162,7 @@ void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
|
|||||||
* later timeout is still used.
|
* later timeout is still used.
|
||||||
*/
|
*/
|
||||||
if (clean_all && watchdog_on) {
|
if (clean_all && watchdog_on) {
|
||||||
gk20a_channel_timeout_continue(c);
|
nvgpu_channel_wdt_continue(c);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -2406,7 +2406,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
|
|||||||
nvgpu_spinlock_init(&c->ref_actions_lock);
|
nvgpu_spinlock_init(&c->ref_actions_lock);
|
||||||
#endif
|
#endif
|
||||||
nvgpu_spinlock_init(&c->joblist.dynamic.lock);
|
nvgpu_spinlock_init(&c->joblist.dynamic.lock);
|
||||||
nvgpu_spinlock_init(&c->timeout.lock);
|
nvgpu_spinlock_init(&c->wdt.lock);
|
||||||
|
|
||||||
nvgpu_init_list_node(&c->joblist.dynamic.jobs);
|
nvgpu_init_list_node(&c->joblist.dynamic.jobs);
|
||||||
nvgpu_init_list_node(&c->dbg_s_list);
|
nvgpu_init_list_node(&c->dbg_s_list);
|
||||||
|
|||||||
@@ -404,7 +404,7 @@ static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
*/
|
*/
|
||||||
need_job_tracking = (fence_wait ||
|
need_job_tracking = (fence_wait ||
|
||||||
fence_get ||
|
fence_get ||
|
||||||
c->timeout.enabled ||
|
c->wdt.enabled ||
|
||||||
(nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)
|
(nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)
|
||||||
&& !c->deterministic) ||
|
&& !c->deterministic) ||
|
||||||
!skip_buffer_refcounting);
|
!skip_buffer_refcounting);
|
||||||
@@ -442,7 +442,7 @@ static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
*/
|
*/
|
||||||
need_deferred_cleanup = !c->deterministic ||
|
need_deferred_cleanup = !c->deterministic ||
|
||||||
need_sync_framework ||
|
need_sync_framework ||
|
||||||
c->timeout.enabled ||
|
c->wdt.enabled ||
|
||||||
(nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) &&
|
(nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) &&
|
||||||
!c->deterministic) ||
|
!c->deterministic) ||
|
||||||
!skip_buffer_refcounting;
|
!skip_buffer_refcounting;
|
||||||
|
|||||||
@@ -486,7 +486,7 @@ u32 gk20a_ce_create_context(struct gk20a *g,
|
|||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
ce_ctx->ch->timeout.enabled = false;
|
ce_ctx->ch->wdt.enabled = false;
|
||||||
|
|
||||||
/* bind the channel to the vm */
|
/* bind the channel to the vm */
|
||||||
err = g->ops.mm.vm_bind_channel(g->mm.ce.vm, ce_ctx->ch);
|
err = g->ops.mm.vm_bind_channel(g->mm.ce.vm, ce_ctx->ch);
|
||||||
|
|||||||
@@ -1255,7 +1255,7 @@ bool gk20a_fifo_handle_sched_error(struct gk20a *g)
|
|||||||
* Cancel all channels' timeout since SCHED error might
|
* Cancel all channels' timeout since SCHED error might
|
||||||
* trigger multiple watchdogs at a time
|
* trigger multiple watchdogs at a time
|
||||||
*/
|
*/
|
||||||
gk20a_channel_timeout_restart_all_channels(g);
|
nvgpu_channel_wdt_restart_all_channels(g);
|
||||||
gk20a_fifo_recover(g, BIT(engine_id), id,
|
gk20a_fifo_recover(g, BIT(engine_id), id,
|
||||||
is_tsg, true, verbose,
|
is_tsg, true, verbose,
|
||||||
RC_TYPE_CTXSW_TIMEOUT);
|
RC_TYPE_CTXSW_TIMEOUT);
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ void gv100_apply_ctxsw_timeout_intr(struct gk20a *g)
|
|||||||
{
|
{
|
||||||
u32 timeout;
|
u32 timeout;
|
||||||
|
|
||||||
timeout = g->ch_wdt_timeout_ms*1000U;
|
timeout = g->ch_wdt_init_limit_ms*1000U;
|
||||||
timeout = scale_ptimer(timeout,
|
timeout = scale_ptimer(timeout,
|
||||||
ptimer_scalingfactor10x(g->ptimer_src_freq));
|
ptimer_scalingfactor10x(g->ptimer_src_freq));
|
||||||
timeout |= fifo_eng_timeout_detection_enabled_f();
|
timeout |= fifo_eng_timeout_detection_enabled_f();
|
||||||
|
|||||||
@@ -343,7 +343,7 @@ bool gv11b_fifo_handle_ctxsw_timeout(struct gk20a *g, u32 fifo_intr)
|
|||||||
ms);
|
ms);
|
||||||
|
|
||||||
/* Cancel all channels' timeout */
|
/* Cancel all channels' timeout */
|
||||||
gk20a_channel_timeout_restart_all_channels(g);
|
nvgpu_channel_wdt_restart_all_channels(g);
|
||||||
gk20a_fifo_recover(g, BIT32(active_eng_id),
|
gk20a_fifo_recover(g, BIT32(active_eng_id),
|
||||||
tsgid, true, true, verbose,
|
tsgid, true, true, verbose,
|
||||||
RC_TYPE_CTXSW_TIMEOUT);
|
RC_TYPE_CTXSW_TIMEOUT);
|
||||||
|
|||||||
@@ -168,7 +168,7 @@ struct channel_gk20a_joblist {
|
|||||||
struct nvgpu_mutex cleanup_lock;
|
struct nvgpu_mutex cleanup_lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct channel_gk20a_timeout {
|
struct nvgpu_channel_wdt {
|
||||||
/* lock protects the running timer state */
|
/* lock protects the running timer state */
|
||||||
struct nvgpu_spinlock lock;
|
struct nvgpu_spinlock lock;
|
||||||
struct nvgpu_timeout timer;
|
struct nvgpu_timeout timer;
|
||||||
@@ -276,7 +276,7 @@ struct channel_gk20a {
|
|||||||
struct nvgpu_cond semaphore_wq;
|
struct nvgpu_cond semaphore_wq;
|
||||||
|
|
||||||
/* kernel watchdog to kill stuck jobs */
|
/* kernel watchdog to kill stuck jobs */
|
||||||
struct channel_gk20a_timeout timeout;
|
struct nvgpu_channel_wdt wdt;
|
||||||
|
|
||||||
/* for job cleanup handling in the background worker */
|
/* for job cleanup handling in the background worker */
|
||||||
struct nvgpu_list_node worker_item;
|
struct nvgpu_list_node worker_item;
|
||||||
@@ -429,7 +429,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
|
|||||||
int nvgpu_channel_setup_bind(struct channel_gk20a *c,
|
int nvgpu_channel_setup_bind(struct channel_gk20a *c,
|
||||||
struct nvgpu_setup_bind_args *args);
|
struct nvgpu_setup_bind_args *args);
|
||||||
|
|
||||||
void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);
|
void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g);
|
||||||
|
|
||||||
bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c);
|
bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c);
|
||||||
void channel_gk20a_joblist_lock(struct channel_gk20a *c);
|
void channel_gk20a_joblist_lock(struct channel_gk20a *c);
|
||||||
|
|||||||
@@ -1884,7 +1884,7 @@ struct gk20a {
|
|||||||
#endif
|
#endif
|
||||||
u32 gr_idle_timeout_default;
|
u32 gr_idle_timeout_default;
|
||||||
bool timeouts_disabled_by_user;
|
bool timeouts_disabled_by_user;
|
||||||
unsigned int ch_wdt_timeout_ms;
|
unsigned int ch_wdt_init_limit_ms;
|
||||||
u32 fifo_eng_timeout_us;
|
u32 fifo_eng_timeout_us;
|
||||||
|
|
||||||
struct nvgpu_mutex power_lock;
|
struct nvgpu_mutex power_lock;
|
||||||
|
|||||||
@@ -1347,7 +1347,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
|
|||||||
goto err_get_gk20a_channel;
|
goto err_get_gk20a_channel;
|
||||||
}
|
}
|
||||||
|
|
||||||
ch->timeout.enabled = false;
|
ch->wdt.enabled = false;
|
||||||
|
|
||||||
/* bind the channel to the vm */
|
/* bind the channel to the vm */
|
||||||
err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch);
|
err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch);
|
||||||
|
|||||||
@@ -355,8 +355,8 @@ void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
|
|||||||
debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
|
debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
|
||||||
l->debugfs, &gk20a_debug_trace_cmdbuf);
|
l->debugfs, &gk20a_debug_trace_cmdbuf);
|
||||||
|
|
||||||
debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
|
debugfs_create_u32("ch_wdt_init_limit_ms", S_IRUGO|S_IWUSR,
|
||||||
l->debugfs, &g->ch_wdt_timeout_ms);
|
l->debugfs, &g->ch_wdt_init_limit_ms);
|
||||||
|
|
||||||
debugfs_create_u32("disable_syncpoints", S_IRUGO,
|
debugfs_create_u32("disable_syncpoints", S_IRUGO,
|
||||||
l->debugfs, &g->disable_syncpoints);
|
l->debugfs, &g->disable_syncpoints);
|
||||||
|
|||||||
@@ -122,7 +122,7 @@ static void nvgpu_init_timeout(struct gk20a *g)
|
|||||||
} else {
|
} else {
|
||||||
g->gr_idle_timeout_default = (u32)ULONG_MAX;
|
g->gr_idle_timeout_default = (u32)ULONG_MAX;
|
||||||
}
|
}
|
||||||
g->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
|
g->ch_wdt_init_limit_ms = platform->ch_wdt_init_limit_ms;
|
||||||
g->fifo_eng_timeout_us = GRFIFO_TIMEOUT_CHECK_PERIOD_US;
|
g->fifo_eng_timeout_us = GRFIFO_TIMEOUT_CHECK_PERIOD_US;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -286,16 +286,16 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
|
|||||||
NVGPU_IOCTL_CHANNEL_ENABLE_WDT);
|
NVGPU_IOCTL_CHANNEL_ENABLE_WDT);
|
||||||
|
|
||||||
if (status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT)
|
if (status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT)
|
||||||
ch->timeout.enabled = false;
|
ch->wdt.enabled = false;
|
||||||
else if (status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT)
|
else if (status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT)
|
||||||
ch->timeout.enabled = true;
|
ch->wdt.enabled = true;
|
||||||
else
|
else
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (args->wdt_status & NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT)
|
if (args->wdt_status & NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT)
|
||||||
ch->timeout.limit_ms = args->timeout_ms;
|
ch->wdt.limit_ms = args->timeout_ms;
|
||||||
|
|
||||||
ch->timeout.debug_dump = (args->wdt_status &
|
ch->wdt.debug_dump = (args->wdt_status &
|
||||||
NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP) == 0;
|
NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP) == 0;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
|
|||||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||||
|
|
||||||
.ch_wdt_timeout_ms = 7000,
|
.ch_wdt_init_limit_ms = 7000,
|
||||||
|
|
||||||
.unify_address_spaces = true,
|
.unify_address_spaces = true,
|
||||||
.honors_aperture = true,
|
.honors_aperture = true,
|
||||||
@@ -133,7 +133,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
|
|||||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||||
|
|
||||||
.ch_wdt_timeout_ms = 7000,
|
.ch_wdt_init_limit_ms = 7000,
|
||||||
|
|
||||||
.unify_address_spaces = true,
|
.unify_address_spaces = true,
|
||||||
.honors_aperture = true,
|
.honors_aperture = true,
|
||||||
@@ -168,7 +168,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
|
|||||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||||
|
|
||||||
.ch_wdt_timeout_ms = 7000,
|
.ch_wdt_init_limit_ms = 7000,
|
||||||
|
|
||||||
.unify_address_spaces = true,
|
.unify_address_spaces = true,
|
||||||
.honors_aperture = true,
|
.honors_aperture = true,
|
||||||
@@ -205,7 +205,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
|
|||||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||||
|
|
||||||
.ch_wdt_timeout_ms = 7000,
|
.ch_wdt_init_limit_ms = 7000,
|
||||||
|
|
||||||
.unify_address_spaces = true,
|
.unify_address_spaces = true,
|
||||||
.honors_aperture = true,
|
.honors_aperture = true,
|
||||||
@@ -244,7 +244,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
|
|||||||
/*
|
/*
|
||||||
* WAR: PCIE X1 is very slow, set to very high value till nvlink is up
|
* WAR: PCIE X1 is very slow, set to very high value till nvlink is up
|
||||||
*/
|
*/
|
||||||
.ch_wdt_timeout_ms = 30000,
|
.ch_wdt_init_limit_ms = 30000,
|
||||||
|
|
||||||
.unify_address_spaces = true,
|
.unify_address_spaces = true,
|
||||||
.honors_aperture = true,
|
.honors_aperture = true,
|
||||||
@@ -280,7 +280,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
|
|||||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||||
|
|
||||||
.ch_wdt_timeout_ms = 7000,
|
.ch_wdt_init_limit_ms = 7000,
|
||||||
|
|
||||||
.unify_address_spaces = true,
|
.unify_address_spaces = true,
|
||||||
.honors_aperture = true,
|
.honors_aperture = true,
|
||||||
|
|||||||
@@ -135,7 +135,7 @@ struct gk20a_platform {
|
|||||||
bool enable_mscg;
|
bool enable_mscg;
|
||||||
|
|
||||||
/* Timeout for per-channel watchdog (in mS) */
|
/* Timeout for per-channel watchdog (in mS) */
|
||||||
u32 ch_wdt_timeout_ms;
|
u32 ch_wdt_init_limit_ms;
|
||||||
|
|
||||||
/* Disable big page support */
|
/* Disable big page support */
|
||||||
bool disable_bigpage;
|
bool disable_bigpage;
|
||||||
|
|||||||
@@ -904,7 +904,7 @@ struct gk20a_platform gm20b_tegra_platform = {
|
|||||||
|
|
||||||
.force_reset_in_do_idle = false,
|
.force_reset_in_do_idle = false,
|
||||||
|
|
||||||
.ch_wdt_timeout_ms = 5000,
|
.ch_wdt_init_limit_ms = 5000,
|
||||||
|
|
||||||
.probe = gk20a_tegra_probe,
|
.probe = gk20a_tegra_probe,
|
||||||
.late_probe = gk20a_tegra_late_probe,
|
.late_probe = gk20a_tegra_late_probe,
|
||||||
|
|||||||
@@ -451,7 +451,7 @@ struct gk20a_platform gp10b_tegra_platform = {
|
|||||||
/* ptimer src frequency in hz*/
|
/* ptimer src frequency in hz*/
|
||||||
.ptimer_src_freq = 31250000,
|
.ptimer_src_freq = 31250000,
|
||||||
|
|
||||||
.ch_wdt_timeout_ms = 5000,
|
.ch_wdt_init_limit_ms = 5000,
|
||||||
|
|
||||||
.probe = gp10b_tegra_probe,
|
.probe = gp10b_tegra_probe,
|
||||||
.late_probe = gp10b_tegra_late_probe,
|
.late_probe = gp10b_tegra_late_probe,
|
||||||
|
|||||||
@@ -224,7 +224,7 @@ struct gk20a_platform gv11b_tegra_platform = {
|
|||||||
/* ptimer src frequency in hz*/
|
/* ptimer src frequency in hz*/
|
||||||
.ptimer_src_freq = 31250000,
|
.ptimer_src_freq = 31250000,
|
||||||
|
|
||||||
.ch_wdt_timeout_ms = 5000,
|
.ch_wdt_init_limit_ms = 5000,
|
||||||
|
|
||||||
.probe = gv11b_tegra_probe,
|
.probe = gv11b_tegra_probe,
|
||||||
.late_probe = gv11b_tegra_late_probe,
|
.late_probe = gv11b_tegra_late_probe,
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ struct gk20a_platform gv11b_vgpu_tegra_platform = {
|
|||||||
.can_blcg = false,
|
.can_blcg = false,
|
||||||
.can_elcg = false,
|
.can_elcg = false,
|
||||||
|
|
||||||
.ch_wdt_timeout_ms = 5000,
|
.ch_wdt_init_limit_ms = 5000,
|
||||||
|
|
||||||
.probe = gv11b_vgpu_probe,
|
.probe = gv11b_vgpu_probe,
|
||||||
|
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ struct gk20a_platform vgpu_tegra_platform = {
|
|||||||
.can_blcg = false,
|
.can_blcg = false,
|
||||||
.can_elcg = false,
|
.can_elcg = false,
|
||||||
|
|
||||||
.ch_wdt_timeout_ms = 5000,
|
.ch_wdt_init_limit_ms = 5000,
|
||||||
|
|
||||||
.probe = gk20a_tegra_probe,
|
.probe = gk20a_tegra_probe,
|
||||||
|
|
||||||
|
|||||||
@@ -360,7 +360,7 @@ int vgpu_probe(struct platform_device *pdev)
|
|||||||
|
|
||||||
nvgpu_spinlock_init(&gk20a->mc_enable_lock);
|
nvgpu_spinlock_init(&gk20a->mc_enable_lock);
|
||||||
|
|
||||||
gk20a->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
|
gk20a->ch_wdt_init_limit_ms = platform->ch_wdt_init_limit_ms;
|
||||||
|
|
||||||
/* Initialize the platform interface. */
|
/* Initialize the platform interface. */
|
||||||
err = platform->probe(dev);
|
err = platform->probe(dev);
|
||||||
|
|||||||
Reference in New Issue
Block a user