gpu: nvgpu: rename timeout of channel struct to wdt

Rename channel_gk20a_timeout to nvgpu_channel_wdt.
Rename timeout variable of channel_gk20a struct to wdt.
Rename ch_wdt_timeout_ms to ch_wdt_init_limit_ms.

Rename gk20a_channel_timeout_* to nvgpu_channel_wdt_*

JIRA NVGPU-1312

Change-Id: Ida78426cc007b53f3d407cf85428d15f7fe7518a
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2077641
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Seema Khowala
2019-03-20 11:35:22 -07:00
committed by mobile promotions
parent 737de7eac5
commit 9393e2a90a
20 changed files with 88 additions and 88 deletions

View File

@@ -746,9 +746,9 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
ch->unserviceable = false;
/* init kernel watchdog timeout */
ch->timeout.enabled = true;
ch->timeout.limit_ms = g->ch_wdt_timeout_ms;
ch->timeout.debug_dump = true;
ch->wdt.enabled = true;
ch->wdt.limit_ms = g->ch_wdt_init_limit_ms;
ch->wdt.debug_dump = true;
ch->obj_class = 0;
ch->subctx_id = 0;
@@ -1282,10 +1282,10 @@ int nvgpu_channel_setup_bind(struct channel_gk20a *c,
}
}
if (!nvgpu_is_timeouts_enabled(c->g) || !c->timeout.enabled) {
if (!nvgpu_is_timeouts_enabled(c->g) || !c->wdt.enabled) {
acquire_timeout = 0;
} else {
acquire_timeout = c->timeout.limit_ms;
acquire_timeout = c->wdt.limit_ms;
}
err = g->ops.fifo.setup_ramfc(c, gpfifo_gpu_va,
@@ -1507,20 +1507,20 @@ u32 nvgpu_get_gp_free_count(struct channel_gk20a *c)
return nvgpu_gp_free_count(c);
}
static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
static void nvgpu_channel_wdt_init(struct channel_gk20a *ch)
{
struct gk20a *g = ch->g;
if (gk20a_channel_check_unserviceable(ch)) {
ch->timeout.running = false;
ch->wdt.running = false;
return;
}
ch->timeout.gp_get = g->ops.userd.gp_get(g, ch);
ch->timeout.pb_get = g->ops.userd.pb_get(g, ch);
ch->timeout.running = true;
nvgpu_timeout_init(g, &ch->timeout.timer,
ch->timeout.limit_ms,
ch->wdt.gp_get = g->ops.userd.gp_get(g, ch);
ch->wdt.pb_get = g->ops.userd.pb_get(g, ch);
ch->wdt.running = true;
nvgpu_timeout_init(g, &ch->wdt.timer,
ch->wdt.limit_ms,
NVGPU_TIMER_CPU_TIMER);
}
@@ -1539,24 +1539,24 @@ static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
* actually stuck at that time. After the timeout duration has expired, a
* worker thread will consider the channel stuck and recover it if stuck.
*/
static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
static void nvgpu_channel_wdt_start(struct channel_gk20a *ch)
{
if (!nvgpu_is_timeouts_enabled(ch->g)) {
return;
}
if (!ch->timeout.enabled) {
if (!ch->wdt.enabled) {
return;
}
nvgpu_spinlock_acquire(&ch->timeout.lock);
nvgpu_spinlock_acquire(&ch->wdt.lock);
if (ch->timeout.running) {
nvgpu_spinlock_release(&ch->timeout.lock);
if (ch->wdt.running) {
nvgpu_spinlock_release(&ch->wdt.lock);
return;
}
__gk20a_channel_timeout_start(ch);
nvgpu_spinlock_release(&ch->timeout.lock);
nvgpu_channel_wdt_init(ch);
nvgpu_spinlock_release(&ch->wdt.lock);
}
/**
@@ -1570,14 +1570,14 @@ static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
* (This should be called from an update handler running in the same thread
* with the watchdog.)
*/
static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch)
static bool nvgpu_channel_wdt_stop(struct channel_gk20a *ch)
{
bool was_running;
nvgpu_spinlock_acquire(&ch->timeout.lock);
was_running = ch->timeout.running;
ch->timeout.running = false;
nvgpu_spinlock_release(&ch->timeout.lock);
nvgpu_spinlock_acquire(&ch->wdt.lock);
was_running = ch->wdt.running;
ch->wdt.running = false;
nvgpu_spinlock_release(&ch->wdt.lock);
return was_running;
}
@@ -1590,11 +1590,11 @@ static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch)
* (This should be called from an update handler running in the same thread
* with the watchdog.)
*/
static void gk20a_channel_timeout_continue(struct channel_gk20a *ch)
static void nvgpu_channel_wdt_continue(struct channel_gk20a *ch)
{
nvgpu_spinlock_acquire(&ch->timeout.lock);
ch->timeout.running = true;
nvgpu_spinlock_release(&ch->timeout.lock);
nvgpu_spinlock_acquire(&ch->wdt.lock);
ch->wdt.running = true;
nvgpu_spinlock_release(&ch->wdt.lock);
}
/**
@@ -1607,13 +1607,13 @@ static void gk20a_channel_timeout_continue(struct channel_gk20a *ch)
* timeouts. Stopped timeouts can only be started (which is technically a
* rewind too) or continued (where the stop is actually pause).
*/
static void gk20a_channel_timeout_rewind(struct channel_gk20a *ch)
static void nvgpu_channel_wdt_rewind(struct channel_gk20a *ch)
{
nvgpu_spinlock_acquire(&ch->timeout.lock);
if (ch->timeout.running) {
__gk20a_channel_timeout_start(ch);
nvgpu_spinlock_acquire(&ch->wdt.lock);
if (ch->wdt.running) {
nvgpu_channel_wdt_init(ch);
}
nvgpu_spinlock_release(&ch->timeout.lock);
nvgpu_spinlock_release(&ch->wdt.lock);
}
/**
@@ -1624,7 +1624,7 @@ static void gk20a_channel_timeout_rewind(struct channel_gk20a *ch)
* called when a global hang is detected that could cause a false positive on
* other innocent channels.
*/
void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g)
{
struct fifo_gk20a *f = &g->fifo;
u32 chid;
@@ -1634,7 +1634,7 @@ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
if (ch != NULL) {
if (!gk20a_channel_check_unserviceable(ch)) {
gk20a_channel_timeout_rewind(ch);
nvgpu_channel_wdt_rewind(ch);
}
gk20a_channel_put(ch);
}
@@ -1651,7 +1651,7 @@ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
* The gpu is implicitly on at this point, because the watchdog can only run on
* channels that have submitted jobs pending for cleanup.
*/
static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
static void nvgpu_channel_wdt_handler(struct channel_gk20a *ch)
{
struct gk20a *g = ch->g;
u32 gp_get;
@@ -1663,36 +1663,36 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
if (gk20a_channel_check_unserviceable(ch)) {
/* channel is already recovered */
gk20a_channel_timeout_stop(ch);
nvgpu_channel_wdt_stop(ch);
return;
}
/* Get status but keep timer running */
nvgpu_spinlock_acquire(&ch->timeout.lock);
gp_get = ch->timeout.gp_get;
pb_get = ch->timeout.pb_get;
nvgpu_spinlock_release(&ch->timeout.lock);
nvgpu_spinlock_acquire(&ch->wdt.lock);
gp_get = ch->wdt.gp_get;
pb_get = ch->wdt.pb_get;
nvgpu_spinlock_release(&ch->wdt.lock);
new_gp_get = g->ops.userd.gp_get(g, ch);
new_pb_get = g->ops.userd.pb_get(g, ch);
if (new_gp_get != gp_get || new_pb_get != pb_get) {
/* Channel has advanced, timer keeps going but resets */
gk20a_channel_timeout_rewind(ch);
} else if (nvgpu_timeout_peek_expired(&ch->timeout.timer) == 0) {
nvgpu_channel_wdt_rewind(ch);
} else if (nvgpu_timeout_peek_expired(&ch->wdt.timer) == 0) {
/* Seems stuck but waiting to time out */
} else {
nvgpu_err(g, "Job on channel %d timed out",
ch->chid);
/* force reset calls gk20a_debug_dump but not this */
if (ch->timeout.debug_dump) {
if (ch->wdt.debug_dump) {
gk20a_gr_debug_dump(g);
}
g->ops.fifo.force_reset_ch(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT,
ch->timeout.debug_dump);
ch->wdt.debug_dump);
}
}
@@ -1707,23 +1707,23 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
* The timeout is stopped (disabled) after the last job in a row finishes
* and marks the channel idle.
*/
static void gk20a_channel_timeout_check(struct channel_gk20a *ch)
static void nvgpu_channel_wdt_check(struct channel_gk20a *ch)
{
bool running;
nvgpu_spinlock_acquire(&ch->timeout.lock);
running = ch->timeout.running;
nvgpu_spinlock_release(&ch->timeout.lock);
nvgpu_spinlock_acquire(&ch->wdt.lock);
running = ch->wdt.running;
nvgpu_spinlock_release(&ch->wdt.lock);
if (running) {
gk20a_channel_timeout_handler(ch);
nvgpu_channel_wdt_handler(ch);
}
}
/**
* Loop every living channel, check timeouts and handle stuck channels.
*/
static void gk20a_channel_poll_timeouts(struct gk20a *g)
static void nvgpu_channel_poll_wdt(struct gk20a *g)
{
unsigned int chid;
@@ -1733,7 +1733,7 @@ static void gk20a_channel_poll_timeouts(struct gk20a *g)
if (ch != NULL) {
if (!gk20a_channel_check_unserviceable(ch)) {
gk20a_channel_timeout_check(ch);
nvgpu_channel_wdt_check(ch);
}
gk20a_channel_put(ch);
}
@@ -1879,7 +1879,7 @@ static int gk20a_channel_poll_worker(void *arg)
}
if (nvgpu_timeout_peek_expired(&timeout) != 0) {
gk20a_channel_poll_timeouts(g);
nvgpu_channel_poll_wdt(g);
nvgpu_timeout_init(g, &timeout, watchdog_interval,
NVGPU_TIMER_CPU_TIMER);
}
@@ -2057,7 +2057,7 @@ int gk20a_channel_add_job(struct channel_gk20a *c,
job->num_mapped_buffers = num_mapped_buffers;
job->mapped_buffers = mapped_buffers;
gk20a_channel_timeout_start(c);
nvgpu_channel_wdt_start(c);
if (!pre_alloc_enabled) {
channel_gk20a_joblist_lock(c);
@@ -2124,7 +2124,7 @@ void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
* anyway (this would be a no-op).
*/
if (clean_all) {
watchdog_on = gk20a_channel_timeout_stop(c);
watchdog_on = nvgpu_channel_wdt_stop(c);
}
/* Synchronize with abort cleanup that needs the jobs. */
@@ -2162,7 +2162,7 @@ void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
* later timeout is still used.
*/
if (clean_all && watchdog_on) {
gk20a_channel_timeout_continue(c);
nvgpu_channel_wdt_continue(c);
}
break;
}
@@ -2406,7 +2406,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
nvgpu_spinlock_init(&c->ref_actions_lock);
#endif
nvgpu_spinlock_init(&c->joblist.dynamic.lock);
nvgpu_spinlock_init(&c->timeout.lock);
nvgpu_spinlock_init(&c->wdt.lock);
nvgpu_init_list_node(&c->joblist.dynamic.jobs);
nvgpu_init_list_node(&c->dbg_s_list);

View File

@@ -404,7 +404,7 @@ static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c,
*/
need_job_tracking = (fence_wait ||
fence_get ||
c->timeout.enabled ||
c->wdt.enabled ||
(nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)
&& !c->deterministic) ||
!skip_buffer_refcounting);
@@ -442,7 +442,7 @@ static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c,
*/
need_deferred_cleanup = !c->deterministic ||
need_sync_framework ||
c->timeout.enabled ||
c->wdt.enabled ||
(nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) &&
!c->deterministic) ||
!skip_buffer_refcounting;

View File

@@ -486,7 +486,7 @@ u32 gk20a_ce_create_context(struct gk20a *g,
err = -ENOMEM;
goto end;
}
ce_ctx->ch->timeout.enabled = false;
ce_ctx->ch->wdt.enabled = false;
/* bind the channel to the vm */
err = g->ops.mm.vm_bind_channel(g->mm.ce.vm, ce_ctx->ch);

View File

@@ -1255,7 +1255,7 @@ bool gk20a_fifo_handle_sched_error(struct gk20a *g)
* Cancel all channels' timeout since SCHED error might
* trigger multiple watchdogs at a time
*/
gk20a_channel_timeout_restart_all_channels(g);
nvgpu_channel_wdt_restart_all_channels(g);
gk20a_fifo_recover(g, BIT(engine_id), id,
is_tsg, true, verbose,
RC_TYPE_CTXSW_TIMEOUT);

View File

@@ -42,7 +42,7 @@ void gv100_apply_ctxsw_timeout_intr(struct gk20a *g)
{
u32 timeout;
timeout = g->ch_wdt_timeout_ms*1000U;
timeout = g->ch_wdt_init_limit_ms*1000U;
timeout = scale_ptimer(timeout,
ptimer_scalingfactor10x(g->ptimer_src_freq));
timeout |= fifo_eng_timeout_detection_enabled_f();

View File

@@ -343,7 +343,7 @@ bool gv11b_fifo_handle_ctxsw_timeout(struct gk20a *g, u32 fifo_intr)
ms);
/* Cancel all channels' timeout */
gk20a_channel_timeout_restart_all_channels(g);
nvgpu_channel_wdt_restart_all_channels(g);
gk20a_fifo_recover(g, BIT32(active_eng_id),
tsgid, true, true, verbose,
RC_TYPE_CTXSW_TIMEOUT);

View File

@@ -168,7 +168,7 @@ struct channel_gk20a_joblist {
struct nvgpu_mutex cleanup_lock;
};
struct channel_gk20a_timeout {
struct nvgpu_channel_wdt {
/* lock protects the running timer state */
struct nvgpu_spinlock lock;
struct nvgpu_timeout timer;
@@ -276,7 +276,7 @@ struct channel_gk20a {
struct nvgpu_cond semaphore_wq;
/* kernel watchdog to kill stuck jobs */
struct channel_gk20a_timeout timeout;
struct nvgpu_channel_wdt wdt;
/* for job cleanup handling in the background worker */
struct nvgpu_list_node worker_item;
@@ -429,7 +429,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
int nvgpu_channel_setup_bind(struct channel_gk20a *c,
struct nvgpu_setup_bind_args *args);
void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);
void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g);
bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c);
void channel_gk20a_joblist_lock(struct channel_gk20a *c);

View File

@@ -1884,7 +1884,7 @@ struct gk20a {
#endif
u32 gr_idle_timeout_default;
bool timeouts_disabled_by_user;
unsigned int ch_wdt_timeout_ms;
unsigned int ch_wdt_init_limit_ms;
u32 fifo_eng_timeout_us;
struct nvgpu_mutex power_lock;

View File

@@ -1347,7 +1347,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
goto err_get_gk20a_channel;
}
ch->timeout.enabled = false;
ch->wdt.enabled = false;
/* bind the channel to the vm */
err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch);

View File

@@ -355,8 +355,8 @@ void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
l->debugfs, &gk20a_debug_trace_cmdbuf);
debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
l->debugfs, &g->ch_wdt_timeout_ms);
debugfs_create_u32("ch_wdt_init_limit_ms", S_IRUGO|S_IWUSR,
l->debugfs, &g->ch_wdt_init_limit_ms);
debugfs_create_u32("disable_syncpoints", S_IRUGO,
l->debugfs, &g->disable_syncpoints);

View File

@@ -122,7 +122,7 @@ static void nvgpu_init_timeout(struct gk20a *g)
} else {
g->gr_idle_timeout_default = (u32)ULONG_MAX;
}
g->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
g->ch_wdt_init_limit_ms = platform->ch_wdt_init_limit_ms;
g->fifo_eng_timeout_us = GRFIFO_TIMEOUT_CHECK_PERIOD_US;
}

View File

@@ -286,16 +286,16 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
NVGPU_IOCTL_CHANNEL_ENABLE_WDT);
if (status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT)
ch->timeout.enabled = false;
ch->wdt.enabled = false;
else if (status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT)
ch->timeout.enabled = true;
ch->wdt.enabled = true;
else
return -EINVAL;
if (args->wdt_status & NVGPU_IOCTL_CHANNEL_WDT_FLAG_SET_TIMEOUT)
ch->timeout.limit_ms = args->timeout_ms;
ch->wdt.limit_ms = args->timeout_ms;
ch->timeout.debug_dump = (args->wdt_status &
ch->wdt.debug_dump = (args->wdt_status &
NVGPU_IOCTL_CHANNEL_WDT_FLAG_DISABLE_DUMP) == 0;
return 0;

View File

@@ -98,7 +98,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
.ch_wdt_init_limit_ms = 7000,
.unify_address_spaces = true,
.honors_aperture = true,
@@ -133,7 +133,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
.ch_wdt_init_limit_ms = 7000,
.unify_address_spaces = true,
.honors_aperture = true,
@@ -168,7 +168,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
.ch_wdt_init_limit_ms = 7000,
.unify_address_spaces = true,
.honors_aperture = true,
@@ -205,7 +205,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
.ch_wdt_init_limit_ms = 7000,
.unify_address_spaces = true,
.honors_aperture = true,
@@ -244,7 +244,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
/*
* WAR: PCIE X1 is very slow, set to very high value till nvlink is up
*/
.ch_wdt_timeout_ms = 30000,
.ch_wdt_init_limit_ms = 30000,
.unify_address_spaces = true,
.honors_aperture = true,
@@ -280,7 +280,7 @@ static struct gk20a_platform nvgpu_pci_device[] = {
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
.ch_wdt_init_limit_ms = 7000,
.unify_address_spaces = true,
.honors_aperture = true,

View File

@@ -135,7 +135,7 @@ struct gk20a_platform {
bool enable_mscg;
/* Timeout for per-channel watchdog (in mS) */
u32 ch_wdt_timeout_ms;
u32 ch_wdt_init_limit_ms;
/* Disable big page support */
bool disable_bigpage;

View File

@@ -904,7 +904,7 @@ struct gk20a_platform gm20b_tegra_platform = {
.force_reset_in_do_idle = false,
.ch_wdt_timeout_ms = 5000,
.ch_wdt_init_limit_ms = 5000,
.probe = gk20a_tegra_probe,
.late_probe = gk20a_tegra_late_probe,

View File

@@ -451,7 +451,7 @@ struct gk20a_platform gp10b_tegra_platform = {
/* ptimer src frequency in hz*/
.ptimer_src_freq = 31250000,
.ch_wdt_timeout_ms = 5000,
.ch_wdt_init_limit_ms = 5000,
.probe = gp10b_tegra_probe,
.late_probe = gp10b_tegra_late_probe,

View File

@@ -224,7 +224,7 @@ struct gk20a_platform gv11b_tegra_platform = {
/* ptimer src frequency in hz*/
.ptimer_src_freq = 31250000,
.ch_wdt_timeout_ms = 5000,
.ch_wdt_init_limit_ms = 5000,
.probe = gv11b_tegra_probe,
.late_probe = gv11b_tegra_late_probe,

View File

@@ -89,7 +89,7 @@ struct gk20a_platform gv11b_vgpu_tegra_platform = {
.can_blcg = false,
.can_elcg = false,
.ch_wdt_timeout_ms = 5000,
.ch_wdt_init_limit_ms = 5000,
.probe = gv11b_vgpu_probe,

View File

@@ -79,7 +79,7 @@ struct gk20a_platform vgpu_tegra_platform = {
.can_blcg = false,
.can_elcg = false,
.ch_wdt_timeout_ms = 5000,
.ch_wdt_init_limit_ms = 5000,
.probe = gk20a_tegra_probe,

View File

@@ -360,7 +360,7 @@ int vgpu_probe(struct platform_device *pdev)
nvgpu_spinlock_init(&gk20a->mc_enable_lock);
gk20a->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
gk20a->ch_wdt_init_limit_ms = platform->ch_wdt_init_limit_ms;
/* Initialize the platform interface. */
err = platform->probe(dev);