gpu: nvgpu: wait for engines to go idle before suspend

Wait for pbdma and engine to go idle so that the tasks get completed before suspending. Updated the logic in gk20a_wait_engine_idle to consider the ctxsw status. And updated PBDMA idle logic to check the pbdma status and the pb/gp get/put pointers. Bug 3789519 Bug 3832838 Change-Id: Ifd105bbb305eaf358423281b192f67d782d773a4 Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2870162 Reviewed-by: Martin Radev <mradev@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2023-03-13 14:06:57 +05:30
parent 5a2ed4df76
commit 3b414dbf07
3 changed files with 159 additions and 20 deletions
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1,7 +1,7 @@
 /*
 * GK20A Graphics FIFO (gr host)
 *
- * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2023, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -3928,10 +3928,59 @@ bool gk20a_fifo_is_engine_busy(struct gk20a *g)
 	return false;
 }
-int gk20a_fifo_wait_engine_idle(struct gk20a *g)
+int gk20a_fifo_wait_engine_id_idle(struct gk20a *g, u32 engine_id)
 {
 	struct nvgpu_timeout timeout;
 	unsigned long delay = GR_IDLE_CHECK_DEFAULT;
 	int ret = -ETIMEDOUT;
 	u32 host_num_engines;
 	bool ctxsw_active, ctx_status_invalid, engine_busy;
 	u32 status;
 	nvgpu_log_fn(g, " ");
 	host_num_engines =
 		 nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
 	if (engine_id >= host_num_engines) {
 		nvgpu_err(g, "Invalid engine ID");
 		return -EINVAL;
 	}
 	nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
 			   NVGPU_TIMER_CPU_TIMER);
 	do {
 		status = gk20a_readl(g, fifo_engine_status_r(engine_id));
 		ctxsw_active = status &
 			fifo_engine_status_ctxsw_in_progress_f();
 		ctx_status_invalid =
 			(fifo_engine_status_ctx_status_v(status) ==
 			 fifo_engine_status_ctx_status_invalid_v());
 		engine_busy = fifo_engine_status_engine_v(status);
 		if (ctx_status_invalid || (!engine_busy && !ctxsw_active)) {
 			nvgpu_log_fn(g, "done");
 			ret = 0;
 			break;
 		}
 		nvgpu_usleep_range(delay, delay * 2);
 		delay = min_t(unsigned long,
 				delay << 1, GR_IDLE_CHECK_MAX);
 	} while (!nvgpu_timeout_expired(&timeout));
 	if (ret) {
 		nvgpu_log_info(g, "cannot idle engine %u %x", engine_id, status);
 	}
 	nvgpu_log_fn(g, "done");
 	return ret;
 }
 int gk20a_fifo_wait_engine_idle(struct gk20a *g)
 {
 	int ret = -ETIMEDOUT;
 	u32 i, host_num_engines;
@@ -3940,24 +3989,9 @@ int gk20a_fifo_wait_engine_idle(struct gk20a *g)
 	host_num_engines =
 		 nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
 	nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
 			   NVGPU_TIMER_CPU_TIMER);
 	for (i = 0; i < host_num_engines; i++) {
-		do {
+		ret = gk20a_fifo_wait_engine_id_idle(g, i);
-			u32 status = gk20a_readl(g, fifo_engine_status_r(i));
+		if (ret != 0) {
 			if (!fifo_engine_status_engine_v(status)) {
 				ret = 0;
 				break;
 			}
 			nvgpu_usleep_range(delay, delay * 2);
 			delay = min_t(unsigned long,
 					delay << 1, GR_IDLE_CHECK_MAX);
 		} while (!nvgpu_timeout_expired(&timeout));
 		if (ret) {
 			nvgpu_log_info(g, "cannot idle engine %u", i);
 			break;
 		}
 	}
@@ -4171,6 +4205,68 @@ void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
 	nvgpu_kfree(g, ch_state);
 }
 int gk20a_fifo_wait_pbdma_idle(struct gk20a *g, u32 pbdma_id)
 {
 	struct nvgpu_timeout timeout;
 	unsigned long delay = GR_IDLE_CHECK_DEFAULT;
 	int ret = -ETIMEDOUT;
 	u64 pbdma_get, pbdma_put;
 	u32 gp_get, gp_put;
 	u32 host_num_pbdma;
 	u32 chan_status;
 	u32 status;
 	nvgpu_log_fn(g, " ");
 	host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
 	if (pbdma_id >= host_num_pbdma) {
 		nvgpu_err(g, "invalid pbdma id %u", pbdma_id);
 		return -EINVAL;
 	}
 	nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
 			   NVGPU_TIMER_CPU_TIMER);
 	do {
 		status = gk20a_readl(g, fifo_pbdma_status_r(pbdma_id));
 		chan_status = fifo_pbdma_status_chan_status_v(status);
 		if (!chan_status) {
 			ret = 0;
 			break;
 		}
 		pbdma_put = (u64)gk20a_readl(g, pbdma_put_r(pbdma_id)) +
 			((u64)gk20a_readl(g, pbdma_put_hi_r(pbdma_id)) << 32ULL);
 		pbdma_get = (u64)gk20a_readl(g, pbdma_get_r(pbdma_id)) +
 			((u64)gk20a_readl(g, pbdma_get_hi_r(pbdma_id)) << 32ULL);
 		gp_put = gk20a_readl(g, pbdma_gp_put_r(pbdma_id));
 		gp_get = gk20a_readl(g, pbdma_gp_get_r(pbdma_id));
 		if ((pbdma_get == pbdma_put) && (gp_get == gp_put)) {
 			ret = 0;
 			break;
 		}
 		nvgpu_usleep_range(delay, delay * 2);
 		delay = min_t(unsigned long,
 				delay << 1, GR_IDLE_CHECK_MAX);
 	} while (!nvgpu_timeout_expired(&timeout));
 	if (ret) {
 		nvgpu_log_info(g, "cannot idle pbdma %u status: %x "
 			"pbdma_get: %llx pbdma_put: %llx gp_get: %x gp_put: %x",
 			pbdma_id, status, pbdma_get, pbdma_put, gp_get, gp_put);
 	}
 	nvgpu_log_fn(g, "done");
 	return ret;
 }
 void gk20a_dump_pbdma_status(struct gk20a *g,
 				 struct gk20a_debug_output *o)
 {
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -1,7 +1,7 @@
 /*
 * GK20A graphics fifo (gr host)
 *
- * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2023, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -278,6 +278,7 @@ int gk20a_fifo_tsg_unbind_channel(struct channel_gk20a *ch);
 void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
 		unsigned long fault_id);
 int gk20a_fifo_wait_engine_id_idle(struct gk20a *g, u32 engine_id);
 int gk20a_fifo_wait_engine_idle(struct gk20a *g);
 bool gk20a_fifo_is_engine_busy(struct gk20a *g);
 u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g);
@@ -369,6 +370,7 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
 				     struct ch_state *ch_state);
 void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
 		 struct gk20a_debug_output *o);
 int gk20a_fifo_wait_pbdma_idle(struct gk20a *g, u32 pbdma_id);
 void gk20a_dump_pbdma_status(struct gk20a *g,
 				 struct gk20a_debug_output *o);
 void gk20a_dump_eng_status(struct gk20a *g,
--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -400,6 +400,40 @@ static int gk20a_lockout_registers(struct gk20a *g)
 	return 0;
 }
 static int gk20a_fifo_wait_engines_idle(struct gk20a *g)
 {
 	u32 engine_id_idx;
 	u32 active_engine_id = 0;
 	int ret;
 	nvgpu_log_fn(g, " ");
 	for (engine_id_idx = 0; engine_id_idx < g->fifo.num_engines; engine_id_idx++) {
 		active_engine_id = g->fifo.active_engines_list[engine_id_idx];
 		ret = gk20a_fifo_wait_pbdma_idle(g,
 			g->fifo.engine_info[active_engine_id].pbdma_id);
 		if (ret != 0) {
 			nvgpu_log_info(g, "failed to idle the pbdma");
 			ret = -EAGAIN;
 			goto done;
 		}
 		ret = gk20a_fifo_wait_engine_id_idle(g,
 			g->fifo.engine_info[active_engine_id].engine_id);
 		if (ret != 0) {
 			nvgpu_log_info(g, "failed to idle the engine");
 			ret = -EAGAIN;
 			goto done;
 		}
 	}
 done:
 	nvgpu_log_fn(g, "done");
 	return ret;
 }
 static int gk20a_pm_prepare_poweroff(struct device *dev)
 {
 	struct gk20a *g = get_gk20a(dev);
@@ -419,6 +453,13 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
 	nvgpu_hide_usermode_for_poweroff(g);
 	ret = gk20a_fifo_wait_engines_idle(g);
 	if (ret) {
 		nvgpu_err(g, "failed to idle engines");
 		nvgpu_restore_usermode_for_poweron(g);
 		goto done;
 	}
 	gk20a_scale_suspend(dev);
 #ifdef CONFIG_NVGPU_SUPPORT_CDE