gpu: nvgpu: wait for engines to go idle before suspend

Wait for pbdma and engine to go idle so that the tasks get completed before
suspending.

Updated the logic in gk20a_wait_engine_idle to consider the ctxsw status.
And updated PBDMA idle logic to check the pbdma status and the pb/gp
get/put pointers.

Bug 3789519
Bug 3832838

Change-Id: Ifd105bbb305eaf358423281b192f67d782d773a4
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2870162
Reviewed-by: Martin Radev <mradev@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
This commit is contained in:
Sagar Kamble
2023-03-13 14:06:57 +05:30
committed by mobile promotions
parent 5a2ed4df76
commit 3b414dbf07
3 changed files with 159 additions and 20 deletions

View File

@@ -1,7 +1,7 @@
/* /*
* GK20A Graphics FIFO (gr host) * GK20A Graphics FIFO (gr host)
* *
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2011-2023, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -3928,10 +3928,59 @@ bool gk20a_fifo_is_engine_busy(struct gk20a *g)
return false; return false;
} }
int gk20a_fifo_wait_engine_idle(struct gk20a *g) int gk20a_fifo_wait_engine_id_idle(struct gk20a *g, u32 engine_id)
{ {
struct nvgpu_timeout timeout; struct nvgpu_timeout timeout;
unsigned long delay = GR_IDLE_CHECK_DEFAULT; unsigned long delay = GR_IDLE_CHECK_DEFAULT;
int ret = -ETIMEDOUT;
u32 host_num_engines;
bool ctxsw_active, ctx_status_invalid, engine_busy;
u32 status;
nvgpu_log_fn(g, " ");
host_num_engines =
nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
if (engine_id >= host_num_engines) {
nvgpu_err(g, "Invalid engine ID");
return -EINVAL;
}
nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
NVGPU_TIMER_CPU_TIMER);
do {
status = gk20a_readl(g, fifo_engine_status_r(engine_id));
ctxsw_active = status &
fifo_engine_status_ctxsw_in_progress_f();
ctx_status_invalid =
(fifo_engine_status_ctx_status_v(status) ==
fifo_engine_status_ctx_status_invalid_v());
engine_busy = fifo_engine_status_engine_v(status);
if (ctx_status_invalid || (!engine_busy && !ctxsw_active)) {
nvgpu_log_fn(g, "done");
ret = 0;
break;
}
nvgpu_usleep_range(delay, delay * 2);
delay = min_t(unsigned long,
delay << 1, GR_IDLE_CHECK_MAX);
} while (!nvgpu_timeout_expired(&timeout));
if (ret) {
nvgpu_log_info(g, "cannot idle engine %u %x", engine_id, status);
}
nvgpu_log_fn(g, "done");
return ret;
}
int gk20a_fifo_wait_engine_idle(struct gk20a *g)
{
int ret = -ETIMEDOUT; int ret = -ETIMEDOUT;
u32 i, host_num_engines; u32 i, host_num_engines;
@@ -3940,24 +3989,9 @@ int gk20a_fifo_wait_engine_idle(struct gk20a *g)
host_num_engines = host_num_engines =
nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES); nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
NVGPU_TIMER_CPU_TIMER);
for (i = 0; i < host_num_engines; i++) { for (i = 0; i < host_num_engines; i++) {
do { ret = gk20a_fifo_wait_engine_id_idle(g, i);
u32 status = gk20a_readl(g, fifo_engine_status_r(i)); if (ret != 0) {
if (!fifo_engine_status_engine_v(status)) {
ret = 0;
break;
}
nvgpu_usleep_range(delay, delay * 2);
delay = min_t(unsigned long,
delay << 1, GR_IDLE_CHECK_MAX);
} while (!nvgpu_timeout_expired(&timeout));
if (ret) {
nvgpu_log_info(g, "cannot idle engine %u", i);
break; break;
} }
} }
@@ -4171,6 +4205,68 @@ void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
nvgpu_kfree(g, ch_state); nvgpu_kfree(g, ch_state);
} }
int gk20a_fifo_wait_pbdma_idle(struct gk20a *g, u32 pbdma_id)
{
struct nvgpu_timeout timeout;
unsigned long delay = GR_IDLE_CHECK_DEFAULT;
int ret = -ETIMEDOUT;
u64 pbdma_get, pbdma_put;
u32 gp_get, gp_put;
u32 host_num_pbdma;
u32 chan_status;
u32 status;
nvgpu_log_fn(g, " ");
host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
if (pbdma_id >= host_num_pbdma) {
nvgpu_err(g, "invalid pbdma id %u", pbdma_id);
return -EINVAL;
}
nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
NVGPU_TIMER_CPU_TIMER);
do {
status = gk20a_readl(g, fifo_pbdma_status_r(pbdma_id));
chan_status = fifo_pbdma_status_chan_status_v(status);
if (!chan_status) {
ret = 0;
break;
}
pbdma_put = (u64)gk20a_readl(g, pbdma_put_r(pbdma_id)) +
((u64)gk20a_readl(g, pbdma_put_hi_r(pbdma_id)) << 32ULL);
pbdma_get = (u64)gk20a_readl(g, pbdma_get_r(pbdma_id)) +
((u64)gk20a_readl(g, pbdma_get_hi_r(pbdma_id)) << 32ULL);
gp_put = gk20a_readl(g, pbdma_gp_put_r(pbdma_id));
gp_get = gk20a_readl(g, pbdma_gp_get_r(pbdma_id));
if ((pbdma_get == pbdma_put) && (gp_get == gp_put)) {
ret = 0;
break;
}
nvgpu_usleep_range(delay, delay * 2);
delay = min_t(unsigned long,
delay << 1, GR_IDLE_CHECK_MAX);
} while (!nvgpu_timeout_expired(&timeout));
if (ret) {
nvgpu_log_info(g, "cannot idle pbdma %u status: %x "
"pbdma_get: %llx pbdma_put: %llx gp_get: %x gp_put: %x",
pbdma_id, status, pbdma_get, pbdma_put, gp_get, gp_put);
}
nvgpu_log_fn(g, "done");
return ret;
}
void gk20a_dump_pbdma_status(struct gk20a *g, void gk20a_dump_pbdma_status(struct gk20a *g,
struct gk20a_debug_output *o) struct gk20a_debug_output *o)
{ {

View File

@@ -1,7 +1,7 @@
/* /*
* GK20A graphics fifo (gr host) * GK20A graphics fifo (gr host)
* *
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2011-2023, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -278,6 +278,7 @@ int gk20a_fifo_tsg_unbind_channel(struct channel_gk20a *ch);
void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g, void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
unsigned long fault_id); unsigned long fault_id);
int gk20a_fifo_wait_engine_id_idle(struct gk20a *g, u32 engine_id);
int gk20a_fifo_wait_engine_idle(struct gk20a *g); int gk20a_fifo_wait_engine_idle(struct gk20a *g);
bool gk20a_fifo_is_engine_busy(struct gk20a *g); bool gk20a_fifo_is_engine_busy(struct gk20a *g);
u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g); u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g);
@@ -369,6 +370,7 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
struct ch_state *ch_state); struct ch_state *ch_state);
void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g, void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
struct gk20a_debug_output *o); struct gk20a_debug_output *o);
int gk20a_fifo_wait_pbdma_idle(struct gk20a *g, u32 pbdma_id);
void gk20a_dump_pbdma_status(struct gk20a *g, void gk20a_dump_pbdma_status(struct gk20a *g,
struct gk20a_debug_output *o); struct gk20a_debug_output *o);
void gk20a_dump_eng_status(struct gk20a *g, void gk20a_dump_eng_status(struct gk20a *g,

View File

@@ -400,6 +400,40 @@ static int gk20a_lockout_registers(struct gk20a *g)
return 0; return 0;
} }
static int gk20a_fifo_wait_engines_idle(struct gk20a *g)
{
u32 engine_id_idx;
u32 active_engine_id = 0;
int ret;
nvgpu_log_fn(g, " ");
for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; engine_id_idx++) {
active_engine_id = g->fifo.active_engines_list[engine_id_idx];
ret = gk20a_fifo_wait_pbdma_idle(g,
g->fifo.engine_info[active_engine_id].pbdma_id);
if (ret != 0) {
nvgpu_log_info(g, "failed to idle the pbdma");
ret = -EAGAIN;
goto done;
}
ret = gk20a_fifo_wait_engine_id_idle(g,
g->fifo.engine_info[active_engine_id].engine_id);
if (ret != 0) {
nvgpu_log_info(g, "failed to idle the engine");
ret = -EAGAIN;
goto done;
}
}
done:
nvgpu_log_fn(g, "done");
return ret;
}
static int gk20a_pm_prepare_poweroff(struct device *dev) static int gk20a_pm_prepare_poweroff(struct device *dev)
{ {
struct gk20a *g = get_gk20a(dev); struct gk20a *g = get_gk20a(dev);
@@ -419,6 +453,13 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
nvgpu_hide_usermode_for_poweroff(g); nvgpu_hide_usermode_for_poweroff(g);
ret = gk20a_fifo_wait_engines_idle(g);
if (ret) {
nvgpu_err(g, "failed to idle engines");
nvgpu_restore_usermode_for_poweron(g);
goto done;
}
gk20a_scale_suspend(dev); gk20a_scale_suspend(dev);
#ifdef CONFIG_NVGPU_SUPPORT_CDE #ifdef CONFIG_NVGPU_SUPPORT_CDE