gpu: nvgpu: Track also pushbuf get for watchdog

Make the watchdog notice also fine-grained changes within a single pushbuffer - by tracking just the gpfifo get, the watchdog could wake when the channel hasn't really been stuck but processing a relatively large or slow pushbuf. Jira NVGPU-72 Change-Id: I15374eea5d9abc9d3725a79d0b960503237e478c Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1485919 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-25 02:52:51 +03:00 · 2017-05-19 15:25:44 +03:00
parent b70bad4b9f
commit f044418427
6 changed files with 24 additions and 1 deletions
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1482,6 +1482,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
 static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
 {
 	ch->timeout.gp_get = ch->g->ops.fifo.userd_gp_get(ch->g, ch);
+	ch->timeout.pb_get = ch->g->ops.fifo.userd_pb_get(ch->g, ch);
 	ch->timeout.running = true;
 	nvgpu_timeout_init(ch->g, &ch->timeout.timer,
 			gk20a_get_channel_watchdog_timeout(ch),
@@ -1602,16 +1603,23 @@ static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
 {
 	struct gk20a *g = ch->g;
 	u32 gp_get;
+	u32 new_gp_get;
+	u64 pb_get;
+	u64 new_pb_get;

 	gk20a_dbg_fn("");

 	/* Get status and clear the timer */
 	nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
 	gp_get = ch->timeout.gp_get;
+	pb_get = ch->timeout.pb_get;
 	ch->timeout.running = false;
 	nvgpu_raw_spinlock_release(&ch->timeout.lock);

-	if (g->ops.fifo.userd_gp_get(ch->g, ch) != gp_get) {
+	new_gp_get = g->ops.fifo.userd_gp_get(ch->g, ch);
+	new_pb_get = g->ops.fifo.userd_pb_get(ch->g, ch);
+
+	if (new_gp_get != gp_get || new_pb_get != pb_get) {
 		/* Channel has advanced, reschedule */
 		gk20a_channel_timeout_start(ch);
 		return;
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -104,6 +104,7 @@ struct channel_gk20a_timeout {
 	struct nvgpu_timeout timer;
 	bool running;
 	u32 gp_get;
+	u64 pb_get;
 };

 struct gk20a_event_id_data {
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -4212,6 +4212,16 @@ u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
 		c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
 }

+u64 gk20a_fifo_userd_pb_get(struct gk20a *g, struct channel_gk20a *c)
+{
+	u32 lo = gk20a_bar1_readl(g,
+		c->userd_gpu_va + sizeof(u32) * ram_userd_get_w());
+	u32 hi = gk20a_bar1_readl(g,
+		c->userd_gpu_va + sizeof(u32) * ram_userd_get_hi_w());
+
+	return ((u64)hi << 32) | lo;
+}
+
 void gk20a_fifo_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
 {
 	gk20a_bar1_writel(g,
@@ -4297,6 +4307,7 @@ void gk20a_init_fifo(struct gpu_ops *gops)
 	gops->fifo.setup_userd = gk20a_fifo_setup_userd;
 	gops->fifo.userd_gp_get = gk20a_fifo_userd_gp_get;
 	gops->fifo.userd_gp_put = gk20a_fifo_userd_gp_put;
+	gops->fifo.userd_pb_get = gk20a_fifo_userd_pb_get;
 	gops->fifo.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val;
 	gops->fifo.teardown_ch_tsg = gk20a_fifo_teardown_ch_tsg;
 	gops->fifo.handle_sched_error = gk20a_fifo_handle_sched_error;
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -343,6 +343,7 @@ void gk20a_fifo_set_runlist_state(struct gk20a *g, u32 runlists_mask,

 u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c);
 void gk20a_fifo_userd_gp_put(struct gk20a *g, struct channel_gk20a *c);
+u64 gk20a_fifo_userd_pb_get(struct gk20a *g, struct channel_gk20a *c);

 bool gk20a_is_fault_engine_subid_gpc(struct gk20a *g, u32 engine_subid);
 #ifdef CONFIG_DEBUG_FS
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -471,6 +471,7 @@ struct gpu_ops {
 					u32 *runlist);
 		u32 (*userd_gp_get)(struct gk20a *g, struct channel_gk20a *ch);
 		void (*userd_gp_put)(struct gk20a *g, struct channel_gk20a *ch);
+		u64 (*userd_pb_get)(struct gk20a *g, struct channel_gk20a *ch);
 		void (*free_channel_ctx_header)(struct channel_gk20a *ch);
 		bool (*is_fault_engine_subid_gpc)(struct gk20a *g,
 					 u32 engine_subid);
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -199,6 +199,7 @@ void gm20b_init_fifo(struct gpu_ops *gops)
 	gops->fifo.setup_userd = gk20a_fifo_setup_userd;
 	gops->fifo.userd_gp_get = gk20a_fifo_userd_gp_get;
 	gops->fifo.userd_gp_put = gk20a_fifo_userd_gp_put;
+	gops->fifo.userd_pb_get = gk20a_fifo_userd_pb_get;
 	gops->fifo.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val;

 	gops->fifo.preempt_channel = gk20a_fifo_preempt_channel;