From 6cbc174fc25187092ea02830b37b282b772fda20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Konsta=20H=C3=B6ltt=C3=A4?= <kholtta@nvidia.com>
Date: Thu, 28 May 2020 09:53:36 +0300
Subject: [PATCH] gpu: nvgpu: avoid channel wdt ifdefs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement empty stubs of the channel watchdog functions for when
watchdog is disabled from build. Add some forward declarations that were
missing. Now most call sites don't need #idefs for the build flag.

Add error checks for the wdt alloc failure.

Jira NVGPU-5494
Jira NVGPU-5493

Change-Id: I2d42e8ab4c5e045cd280b2e1f254396127bd154b
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2352050
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/ce/ce_app.c       |  2 --
 drivers/gpu/nvgpu/common/fifo/channel.c    | 16 +++------
 drivers/gpu/nvgpu/common/fifo/submit.c     | 12 ++-----
 drivers/gpu/nvgpu/common/rc/rc.c           |  2 --
 drivers/gpu/nvgpu/include/nvgpu/channel.h  |  5 ++-
 drivers/gpu/nvgpu/include/nvgpu/watchdog.h | 40 +++++++++++++++++++++-
 drivers/gpu/nvgpu/os/linux/cde.c           |  2 --
 7 files changed, 48 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/ce/ce_app.c b/drivers/gpu/nvgpu/common/ce/ce_app.c
index 5897e605b..a5ecf1087 100644
--- a/drivers/gpu/nvgpu/common/ce/ce_app.c
+++ b/drivers/gpu/nvgpu/common/ce/ce_app.c
@@ -535,9 +535,7 @@ u32 nvgpu_ce_app_create_context(struct gk20a *g,
 		goto end;
 	}
 
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
 	nvgpu_channel_wdt_disable(ce_ctx->ch->wdt);
-#endif
 
 	/* bind the channel to the vm */
 	err = g->ops.mm.vm_bind_channel(g->mm.ce.vm, ce_ctx->ch);
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index d17174ee1..d8be6fe2a 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -640,9 +640,7 @@ int nvgpu_channel_add_job(struct nvgpu_channel *c,
 		job->num_mapped_buffers = num_mapped_buffers;
 		job->mapped_buffers = mapped_buffers;
 
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
 		nvgpu_channel_wdt_start(c->wdt, c);
-#endif
 
 		if (!pre_alloc_enabled) {
 			nvgpu_channel_joblist_lock(c);
@@ -689,9 +687,7 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c,
 	struct nvgpu_channel_job *job;
 	struct gk20a *g;
 	bool job_finished = false;
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
 	bool watchdog_on = false;
-#endif
 
 	if (nvgpu_is_powered_off(c->g)) { /* shutdown case */
 		return;
@@ -700,7 +696,6 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c,
 	vm = c->vm;
 	g = c->g;
 
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
 	/*
 	 * If !clean_all, we're in a condition where watchdog isn't supported
 	 * anyway (this would be a no-op).
@@ -708,7 +703,6 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c,
 	if (clean_all) {
 		watchdog_on = nvgpu_channel_wdt_stop(c->wdt);
 	}
-#endif
 
 	/* Synchronize with abort cleanup that needs the jobs. */
 	nvgpu_mutex_acquire(&c->joblist.cleanup_lock);
@@ -737,7 +731,6 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c,
 
 		completed = nvgpu_fence_is_expired(job->post_fence);
 		if (!completed) {
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
 			/*
 			 * The watchdog eventually sees an updated gp_get if
 			 * something happened in this loop. A new job can have
@@ -748,7 +741,6 @@ void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c,
 			if (clean_all && watchdog_on) {
 				nvgpu_channel_wdt_continue(c->wdt);
 			}
-#endif
 			break;
 		}
 
@@ -1202,10 +1194,8 @@ unbind:
 	g->ops.channel.unbind(ch);
 	g->ops.channel.free_inst(g, ch);
 
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
 	nvgpu_channel_wdt_destroy(ch->wdt);
 	ch->wdt = NULL;
-#endif
 
 #ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
 	channel_free_put_deterministic_ref_from_init(ch);
@@ -1465,6 +1455,10 @@ NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
 
 #ifdef CONFIG_NVGPU_CHANNEL_WDT
 	ch->wdt = nvgpu_channel_wdt_alloc(ch);
+	if (ch->wdt == NULL) {
+		nvgpu_err(g, "wdt alloc failed");
+		goto clean_up;
+	}
 #endif
 
 	ch->obj_class = 0;
@@ -1514,12 +1508,10 @@ static int channel_setup_ramfc(struct nvgpu_channel *c,
 	u64 pbdma_acquire_timeout = 0ULL;
 	struct gk20a *g = c->g;
 
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
 	if (nvgpu_channel_wdt_enabled(c->wdt) &&
 			nvgpu_is_timeouts_enabled(c->g)) {
 		pbdma_acquire_timeout = nvgpu_channel_wdt_limit(c->wdt);
 	}
-#endif
 
 	err = g->ops.ramfc.setup(c, gpfifo_gpu_va, gpfifo_size,
 			pbdma_acquire_timeout, args->flags);
diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c
index 3000acdbb..9e288937e 100644
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -550,12 +550,10 @@ static int nvgpu_submit_deterministic(struct nvgpu_channel *c,
 		return -EINVAL;
 	}
 
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
 	/* the watchdog needs periodic job cleanup */
 	if (nvgpu_channel_wdt_enabled(c->wdt)) {
 		return -EINVAL;
 	}
-#endif
 
 	/*
 	 * Job tracking is necessary on deterministic channels if and only if
@@ -660,16 +658,12 @@ static int nvgpu_submit_nondeterministic(struct nvgpu_channel *c,
 	 * required and a fast submit can be done (ie. only need to write
 	 * out userspace GPFIFO entries and update GP_PUT).
 	 */
-	need_job_tracking = (flag_fence_wait ||
+	need_job_tracking = flag_fence_wait ||
 			flag_fence_get ||
 			nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ||
 			nvgpu_is_vpr_resize_enabled() ||
-			!skip_buffer_refcounting);
-
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
-       need_job_tracking = need_job_tracking ||
-	       nvgpu_channel_wdt_enabled(c->wdt);
-#endif
+			!skip_buffer_refcounting ||
+			nvgpu_channel_wdt_enabled(c->wdt);
 
 	if (need_job_tracking) {
 		/*
diff --git a/drivers/gpu/nvgpu/common/rc/rc.c b/drivers/gpu/nvgpu/common/rc/rc.c
index 5770412a2..2c07c2ec0 100644
--- a/drivers/gpu/nvgpu/common/rc/rc.c
+++ b/drivers/gpu/nvgpu/common/rc/rc.c
@@ -70,13 +70,11 @@ void nvgpu_rc_ctxsw_timeout(struct gk20a *g, u32 eng_bitmask,
 		NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
 
 #ifdef CONFIG_NVGPU_RECOVERY
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
 	/*
 	 * Cancel all channels' wdt since ctxsw timeout might
 	 * trigger multiple watchdogs at a time
 	 */
 	nvgpu_channel_wdt_restart_all_channels(g);
-#endif
 
 	nvgpu_rc_fifo_recover(g, eng_bitmask, tsg->tsgid, true, true, debug_dump,
 			RC_TYPE_CTXSW_TIMEOUT);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index a160a4035..7c2a39cc6 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -48,6 +48,7 @@ struct nvgpu_gr_ctx;
 struct nvgpu_debug_context;
 struct priv_cmd_queue;
 struct priv_cmd_entry;
+struct nvgpu_channel_wdt;
 
 /**
  * S/W defined invalid channel identifier.
@@ -377,12 +378,10 @@ struct nvgpu_channel {
 	struct nvgpu_channel_sync *sync;
 	/* for job cleanup handling in the background worker */
 	struct nvgpu_list_node worker_item;
+#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
 
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
 	/* kernel watchdog to kill stuck jobs */
 	struct nvgpu_channel_wdt *wdt;
-#endif /* CONFIG_NVGPU_CHANNEL_WDT */
-#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
 
 	/** Fence allocator in case of deterministic submit. */
 	struct nvgpu_allocator fence_allocator;
diff --git a/drivers/gpu/nvgpu/include/nvgpu/watchdog.h b/drivers/gpu/nvgpu/include/nvgpu/watchdog.h
index ff002d5d3..4f2c8e78a 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/watchdog.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/watchdog.h
@@ -28,6 +28,7 @@
 struct gk20a;
 struct nvgpu_channel;
 struct nvgpu_worker;
+struct nvgpu_channel_wdt;
 
 struct nvgpu_channel_wdt *nvgpu_channel_wdt_alloc(struct nvgpu_channel *ch);
 void nvgpu_channel_wdt_destroy(struct nvgpu_channel_wdt *wdt);
@@ -49,6 +50,43 @@ void nvgpu_channel_wdt_check(struct nvgpu_channel_wdt *wdt,
 
 void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g);
 
-#endif
+#else /* CONFIG_NVGPU_CHANNEL_WDT */
+
+static inline struct nvgpu_channel_wdt *nvgpu_channel_wdt_alloc(
+		struct nvgpu_channel *ch)
+{
+	return NULL;
+}
+static inline void nvgpu_channel_wdt_destroy(struct nvgpu_channel_wdt *wdt) {}
+static inline void nvgpu_channel_wdt_enable(struct nvgpu_channel_wdt *wdt) {}
+static inline void nvgpu_channel_wdt_disable(struct nvgpu_channel_wdt *wdt) {}
+static inline bool nvgpu_channel_wdt_enabled(struct nvgpu_channel_wdt *wdt)
+{
+	return false;
+}
+
+static inline void nvgpu_channel_wdt_set_limit(struct nvgpu_channel_wdt *wdt,
+		u32 limit_ms) {}
+static inline u32 nvgpu_channel_wdt_limit(struct nvgpu_channel_wdt *wdt)
+{
+	return 0U;
+}
+static inline void nvgpu_channel_wdt_set_debug_dump(
+		struct nvgpu_channel_wdt *wdt,
+		bool dump) {}
+
+static inline void nvgpu_channel_wdt_start(struct nvgpu_channel_wdt *wdt,
+		struct nvgpu_channel *ch) {}
+static inline void nvgpu_channel_wdt_continue(struct nvgpu_channel_wdt *wdt) {}
+static inline bool nvgpu_channel_wdt_stop(struct nvgpu_channel_wdt *wdt)
+{
+	return false;
+}
+static inline void nvgpu_channel_wdt_check(struct nvgpu_channel_wdt *wdt,
+		struct nvgpu_channel *ch) {}
+
+static inline void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g) {}
+
+#endif /* CONFIG_NVGPU_CHANNEL_WDT */
 
 #endif
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c
index c757d83f1..4b1f5bba9 100644
--- a/drivers/gpu/nvgpu/os/linux/cde.c
+++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -1342,9 +1342,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
 		goto err_get_gk20a_channel;
 	}
 
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
 	nvgpu_channel_wdt_disable(ch->wdt);
-#endif
 
 	/* bind the channel to the vm */
 	err = g->ops.mm.vm_bind_channel(g->mm.cde.vm, ch);