From 9a16bc3fd4aa1cc3023c48713497fdb4df2fa8d1 Mon Sep 17 00:00:00 2001
From: Thomas Fleury <tfleury@nvidia.com>
Date: Wed, 29 Jan 2020 14:02:57 -0500
Subject: [PATCH] gpu: nvgpu: wait ACK for FECS watchdog timeout

From gv11b onwards, FECS ucode returns an ACK for set watchdog
timeout method. Failure to wait for this ACK was leading to races,
and in some cases, the ACK could be mistaken for the reply to the
next method.

In particular, this happened for the discover golden image size
method which is sent after set watchdog timeout.

With instrumented FECS ucode, it takes longer for the code to
process the set watchdog timeout method, and the write to ack
that method could happen after nvgpu driver clears the mailbox to
send the discover image size method.

With an invalid golden context image size, FECS ended up causing
an MMU fault while attempting to save past allocated buffer.

Added NVGPU_GR_FALCON_METHOD_SET_WATCHDOG_TIMEOUT to be used with
gops_gr_falcon.ctrl_ctxsw, and implemented 2 variants:
- gm20b_gr_falcon_ctrl_ctxsw, without ACK
- gv11b_gr_falcon_ctrl_ctxsw, with ACK

Added NVGPU_GR_FALCON_SUBMIT_METHOD_F_LOCKED flag to allow
executing above method without re-acquiring FECS lock. Longer term,
the 'flags' could be added to gop_gr_falcon.ctrl_ctxsw parameters.

Use gops_gr_falcon.ctrl_ctxsw instead of register writes to invoke
set watchdog timeout method in gm20b_gr_falcon_wait_ctxsw_ready.

Also replaced calls to gm20b_gr_falcon_ctrl_ctxsw to
gops_gr.falcon.ctrl_ctxsw when appropriate, since there are
multiple variants (gm20b, gp10b and gv11b).

Last, fixed clearing of mailbox 0 in gm20b_gr_falcon_bind_instblk.

Bug 200586923

Change-Id: I653b9a216555eec8cd4bb01d6f202bc77b75a939
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2287340
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 .../gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.h |  4 +-
 .../hal/gr/falcon/gr_falcon_gm20b_fusa.c      | 59 ++++++++++++-------
 .../hal/gr/falcon/gr_falcon_gp10b_fusa.c      |  2 +-
 .../gpu/nvgpu/hal/gr/falcon/gr_falcon_gv11b.h |  4 +-
 .../hal/gr/falcon/gr_falcon_gv11b_fusa.c      | 42 ++++++++++++-
 drivers/gpu/nvgpu/hal/init/hal_gv11b.c        |  2 +-
 .../gpu/nvgpu/include/nvgpu/gr/gr_falcon.h    |  9 +++
 7 files changed, 94 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.h b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.h
index 2a8c0801f..35c0fb337 100644
--- a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.h
+++ b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -47,7 +47,7 @@ void gm20b_gr_falcon_bind_instblk(struct gk20a *g,
 int gm20b_gr_falcon_wait_mem_scrubbing(struct gk20a *g);
 int gm20b_gr_falcon_wait_ctxsw_ready(struct gk20a *g);
 int gm20b_gr_falcon_submit_fecs_method_op(struct gk20a *g,
-	struct nvgpu_fecs_method_op op, bool sleepduringwait);
+	struct nvgpu_fecs_method_op op, u32 flags);
 int gm20b_gr_falcon_ctrl_ctxsw(struct gk20a *g, u32 fecs_method,
 						u32 data, u32 *ret_val);
 void gm20b_gr_falcon_set_current_ctx_invalid(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b_fusa.c b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b_fusa.c
index a2c8ee771..de4475e97 100644
--- a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b_fusa.c
@@ -127,7 +127,7 @@ void gm20b_gr_falcon_bind_instblk(struct gk20a *g,
 			FECS_ARB_CMD_TIMEOUT_DEFAULT_US;
 	u32 inst_ptr_u32;
 
-	nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0);
+	nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), U32_MAX);
 
 	while (((nvgpu_readl(g, gr_fecs_ctxsw_status_1_r()) &
 			gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) &&
@@ -520,11 +520,12 @@ int gm20b_gr_falcon_wait_ctxsw_ready(struct gk20a *g)
 	}
 #endif
 
-	nvgpu_log_info(g, "configuring ctxsw_ucode wdt = 0x%x", wdt_val);
-	nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), U32_MAX);
-	nvgpu_writel(g, gr_fecs_method_data_r(), wdt_val);
-	nvgpu_writel(g, gr_fecs_method_push_r(),
-		     gr_fecs_method_push_adr_set_watchdog_timeout_f());
+	ret = g->ops.gr.falcon.ctrl_ctxsw(g,
+		NVGPU_GR_FALCON_METHOD_SET_WATCHDOG_TIMEOUT, wdt_val, NULL);
+	if (ret != 0) {
+		nvgpu_err(g, "fail to set watchdog timeout");
+		return ret;
+	}
 
 	nvgpu_log_fn(g, "done");
 	return 0;
@@ -537,7 +538,7 @@ int gm20b_gr_falcon_init_ctx_state(struct gk20a *g,
 
 	nvgpu_log_fn(g, " ");
 
-	ret = gm20b_gr_falcon_ctrl_ctxsw(g,
+	ret = g->ops.gr.falcon.ctrl_ctxsw(g,
 		NVGPU_GR_FALCON_METHOD_CTXSW_DISCOVER_IMAGE_SIZE,
 		0, &sizes->golden_image_size);
 	if (ret != 0) {
@@ -548,7 +549,7 @@ int gm20b_gr_falcon_init_ctx_state(struct gk20a *g,
 
 #if defined(CONFIG_NVGPU_DEBUGGER) || \
 defined(CONFIG_NVGPU_CTXSW_FW_ERROR_CODE_TESTING)
-	ret = gm20b_gr_falcon_ctrl_ctxsw(g,
+	ret = g->ops.gr.falcon.ctrl_ctxsw(g,
 		NVGPU_GR_FALCON_METHOD_CTXSW_DISCOVER_PM_IMAGE_SIZE,
 #ifndef CONFIG_NVGPU_CTXSW_FW_ERROR_CODE_TESTING
 		0, &sizes->pm_ctxsw_image_size);
@@ -565,7 +566,7 @@ defined(CONFIG_NVGPU_CTXSW_FW_ERROR_CODE_TESTING)
 #endif
 
 #ifdef CONFIG_NVGPU_GRAPHICS
-	ret = gm20b_gr_falcon_ctrl_ctxsw(g,
+	ret = g->ops.gr.falcon.ctrl_ctxsw(g,
 		NVGPU_GR_FALCON_METHOD_CTXSW_DISCOVER_ZCULL_IMAGE_SIZE,
 		0, &sizes->zcull_image_size);
 	if (ret != 0) {
@@ -619,17 +620,21 @@ void gm20b_gr_falcon_set_current_ctx_invalid(struct gk20a *g)
 		gr_fecs_current_ctx_valid_false_f());
 }
 
-/* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...)
+/*
+ * The following is a less brittle way to call gr_gk20a_submit_fecs_method(...)
  * We should replace most, if not all, fecs method calls to this instead.
  */
 int gm20b_gr_falcon_submit_fecs_method_op(struct gk20a *g,
-				   struct nvgpu_fecs_method_op op,
-				   bool sleepduringwait)
+		struct nvgpu_fecs_method_op op, u32 flags)
 {
 	int ret;
 	struct nvgpu_gr_falcon *gr_falcon = nvgpu_gr_get_falcon_ptr(g);
+	bool sleepduringwait =
+			(flags & NVGPU_GR_FALCON_SUBMIT_METHOD_F_SLEEP) != 0U;
 
-	nvgpu_mutex_acquire(&gr_falcon->fecs_mutex);
+	if ((flags & NVGPU_GR_FALCON_SUBMIT_METHOD_F_LOCKED) == 0U) {
+		nvgpu_mutex_acquire(&gr_falcon->fecs_mutex);
+	}
 
 	if (op.mailbox.id != 0U) {
 		nvgpu_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id),
@@ -659,7 +664,9 @@ int gm20b_gr_falcon_submit_fecs_method_op(struct gk20a *g,
 			op.method.data, op.method.addr);
 	}
 
-	nvgpu_mutex_release(&gr_falcon->fecs_mutex);
+	if ((flags & NVGPU_GR_FALCON_SUBMIT_METHOD_F_LOCKED) == 0U) {
+		nvgpu_mutex_release(&gr_falcon->fecs_mutex);
+	}
 
 	return ret;
 }
@@ -673,8 +680,8 @@ int gm20b_gr_falcon_ctrl_ctxsw(struct gk20a *g, u32 fecs_method,
 		.method.data = 0U,
 		.cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
 		.cond.fail = GR_IS_UCODE_OP_SKIP,
-		};
-	bool sleepduringwait = false;
+	};
+	u32 flags = 0U;
 
 	nvgpu_log_info(g, "fecs method %d data 0x%x ret_value %p",
 						fecs_method, data, ret_val);
@@ -689,7 +696,7 @@ int gm20b_gr_falcon_ctrl_ctxsw(struct gk20a *g, u32 fecs_method,
 		op.mailbox.fail = gr_fecs_ctxsw_mailbox_value_fail_v();
 		op.cond.ok = GR_IS_UCODE_OP_EQUAL;
 		op.cond.fail = GR_IS_UCODE_OP_EQUAL;
-		sleepduringwait = true;
+		flags |= NVGPU_GR_FALCON_SUBMIT_METHOD_F_SLEEP;
 	break;
 
 	case NVGPU_GR_FALCON_METHOD_CTXSW_START:
@@ -700,7 +707,7 @@ int gm20b_gr_falcon_ctrl_ctxsw(struct gk20a *g, u32 fecs_method,
 		op.mailbox.fail = gr_fecs_ctxsw_mailbox_value_fail_v();
 		op.cond.ok = GR_IS_UCODE_OP_EQUAL;
 		op.cond.fail = GR_IS_UCODE_OP_EQUAL;
-		sleepduringwait = true;
+		flags |= NVGPU_GR_FALCON_SUBMIT_METHOD_F_SLEEP;
 	break;
 #endif
 #ifdef CONFIG_NVGPU_ENGINE_RESET
@@ -733,7 +740,7 @@ defined(CONFIG_NVGPU_CTXSW_FW_ERROR_CODE_TESTING)
 		op.method.addr =
 			gr_fecs_method_push_adr_discover_pm_image_size_v();
 		op.mailbox.ret = ret_val;
-		sleepduringwait = true;
+		flags |= NVGPU_GR_FALCON_SUBMIT_METHOD_F_SLEEP;
 		break;
 #endif
 #ifdef CONFIG_NVGPU_POWER_PG
@@ -770,7 +777,7 @@ defined(CONFIG_NVGPU_CTXSW_FW_ERROR_CODE_TESTING)
 		op.mailbox.fail = 0x20U;
 		op.cond.ok = GR_IS_UCODE_OP_AND;
 		op.cond.fail = GR_IS_UCODE_OP_AND;
-		sleepduringwait = true;
+		flags |= NVGPU_GR_FALCON_SUBMIT_METHOD_F_SLEEP;
 		break;
 
 	case NVGPU_GR_FALCON_METHOD_GOLDEN_IMAGE_SAVE:
@@ -781,7 +788,7 @@ defined(CONFIG_NVGPU_CTXSW_FW_ERROR_CODE_TESTING)
 		op.mailbox.fail = 0x2U;
 		op.cond.ok = GR_IS_UCODE_OP_AND;
 		op.cond.fail = GR_IS_UCODE_OP_AND;
-		sleepduringwait = true;
+		flags |= NVGPU_GR_FALCON_SUBMIT_METHOD_F_SLEEP;
 		break;
 #ifdef CONFIG_NVGPU_FECS_TRACE
 	case NVGPU_GR_FALCON_METHOD_FECS_TRACE_FLUSH:
@@ -789,12 +796,20 @@ defined(CONFIG_NVGPU_CTXSW_FW_ERROR_CODE_TESTING)
 			gr_fecs_method_push_adr_write_timestamp_record_v();
 		break;
 #endif
+	case NVGPU_GR_FALCON_METHOD_SET_WATCHDOG_TIMEOUT:
+		op.method.addr =
+			gr_fecs_method_push_adr_set_watchdog_timeout_f();
+		op.method.data = data;
+		op.cond.ok = GR_IS_UCODE_OP_SKIP;
+		flags |= NVGPU_GR_FALCON_SUBMIT_METHOD_F_LOCKED;
+		break;
 
 	default:
 		nvgpu_err(g, "unsupported fecs mode %d", fecs_method);
 		break;
 	}
-	return gm20b_gr_falcon_submit_fecs_method_op(g, op, sleepduringwait);
+
+	return gm20b_gr_falcon_submit_fecs_method_op(g, op, flags);
 }
 
 u32 gm20b_gr_falcon_get_current_ctx(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gp10b_fusa.c b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gp10b_fusa.c
index a74aa85c8..46d76d0d2 100644
--- a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gp10b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gp10b_fusa.c
@@ -80,7 +80,7 @@ int gp10b_gr_falcon_ctrl_ctxsw(struct gk20a *g, u32 fecs_method,
 		op.method.addr =
 			gr_fecs_method_push_adr_discover_preemption_image_size_v();
 		op.mailbox.ret = ret_val;
-		ret = gm20b_gr_falcon_submit_fecs_method_op(g, op, false);
+		ret = gm20b_gr_falcon_submit_fecs_method_op(g, op, 0U);
 	break;
 
 	case NVGPU_GR_FALCON_METHOD_CONFIGURE_CTXSW_INTR:
diff --git a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gv11b.h b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gv11b.h
index 3f8e6359d..0efe4bc19 100644
--- a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gv11b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -29,5 +29,7 @@ struct nvgpu_fecs_ecc_status;
 void gv11b_gr_falcon_handle_fecs_ecc_error(struct gk20a *g,
 			struct nvgpu_fecs_ecc_status *fecs_ecc_status);
 void gv11b_gr_falcon_fecs_host_int_enable(struct gk20a *g);
+int gv11b_gr_falcon_ctrl_ctxsw(struct gk20a *g, u32 fecs_method,
+			u32 data, u32 *ret_val);
 
 #endif /* NVGPU_GR_FALCON_GV11B_H */
diff --git a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gv11b_fusa.c
index d3e10eeba..8b4cb6d24 100644
--- a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gv11b_fusa.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2020, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -24,7 +24,10 @@
 #include <nvgpu/io.h>
 #include <nvgpu/gr/gr_falcon.h>
 
+#include "gr_falcon_gp10b.h"
+#include "gr_falcon_gm20b.h"
 #include "gr_falcon_gv11b.h"
+#include "common/gr/gr_falcon_priv.h"
 
 #include <nvgpu/hw/gv11b/hw_gr_gv11b.h>
 
@@ -117,6 +120,43 @@ void gv11b_gr_falcon_handle_fecs_ecc_error(struct gk20a *g,
 	}
 }
 
+int gv11b_gr_falcon_ctrl_ctxsw(struct gk20a *g, u32 fecs_method,
+		u32 data, u32 *ret_val)
+{
+	struct nvgpu_fecs_method_op op = {
+		.mailbox = { .id = 0U, .data = 0U, .ret = NULL,
+			     .clr = ~U32(0U), .ok = 0U, .fail = 0U},
+		.method.data = 0U,
+		.cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
+		.cond.fail = GR_IS_UCODE_OP_SKIP,
+	};
+	u32 flags = 0;
+	int ret;
+
+	nvgpu_log_info(g, "fecs method %d data 0x%x ret_val %p",
+				fecs_method, data, ret_val);
+
+	switch (fecs_method) {
+	case NVGPU_GR_FALCON_METHOD_SET_WATCHDOG_TIMEOUT:
+		op.method.addr =
+			gr_fecs_method_push_adr_set_watchdog_timeout_f();
+		op.method.data = data;
+		flags |= NVGPU_GR_FALCON_SUBMIT_METHOD_F_LOCKED;
+#ifdef CONFIG_NVGPU_SIM
+		op.cond.ok = GR_IS_UCODE_OP_SKIP;
+#endif
+
+		ret = gm20b_gr_falcon_submit_fecs_method_op(g, op, flags);
+		break;
+
+	default:
+		ret = gp10b_gr_falcon_ctrl_ctxsw(g, fecs_method,
+				data, ret_val);
+		break;
+	}
+	return ret;
+}
+
 void gv11b_gr_falcon_fecs_host_int_enable(struct gk20a *g)
 {
 	nvgpu_writel(g, gr_fecs_host_int_enable_r(),
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
index 55227ed91..743803c99 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -766,7 +766,7 @@ NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 8_7))
 			.wait_mem_scrubbing =
 					gm20b_gr_falcon_wait_mem_scrubbing,
 			.wait_ctxsw_ready = gm20b_gr_falcon_wait_ctxsw_ready,
-			.ctrl_ctxsw = gp10b_gr_falcon_ctrl_ctxsw,
+			.ctrl_ctxsw = gv11b_gr_falcon_ctrl_ctxsw,
 			.get_current_ctx = gm20b_gr_falcon_get_current_ctx,
 			.get_ctx_ptr = gm20b_gr_falcon_get_ctx_ptr,
 			.get_fecs_current_ctx_data =
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h
index 4f0f3cf8e..8838c4ccc 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h
@@ -73,6 +73,15 @@ defined(CONFIG_NVGPU_CTXSW_FW_ERROR_CODE_TESTING)
 #define NVGPU_GR_FALCON_METHOD_CONFIGURE_CTXSW_INTR		13
 #endif
 
+/** Falcon method to set watchdog timeout. */
+#define NVGPU_GR_FALCON_METHOD_SET_WATCHDOG_TIMEOUT		14
+
+/** Sleep while waiting for Falcon ACK */
+#define NVGPU_GR_FALCON_SUBMIT_METHOD_F_SLEEP		BIT32(0)
+
+/** Falcon lock already held */
+#define NVGPU_GR_FALCON_SUBMIT_METHOD_F_LOCKED		BIT32(1)
+
 /** Falcon index of mailbox 0. */
 #define NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0 0U
 /** Falcon index of mailbox 1. */