From 6609a05683a67c21e859d07af40722fdc1fc8212 Mon Sep 17 00:00:00 2001
From: mkumbar <mkumbar@nvidia.com>
Date: Mon, 21 Feb 2022 19:25:07 +0530
Subject: [PATCH] gpu: nvgpu: Move ACR WPR init region cmd to LSFM

Move ACR WPR init region cmd from ISR to LSFM as part of LSF bootstrap
request to execute the ACR commands sequentially as well as a blocking
call by polling is_wpr_init_done status till set to true. Needed to
add dealy after each ACR command for ga10b LSPMU due to nvriscv priv
lockdown for ACR commands asynchronously from the nvgpu as detailed
below,

LSPMU engages priv lockdown whenever ACR commands needs to be
processed, and nvgpu polls for interrupt status by polling
pwr_falcon_irqstat_r registers once command is sent to PMU to
process the ACK message from LSPMU if priv lockdown is not
engaged. During NVRISCV priv lockdown couple of register are
not accessible including irqstat register, priv lockdown is
done by LSPMU upon ACR command receive and its asynchronous
to nvgpu which cause nvgpu irqstat read data to be 0xbadf*
during polling at corner cases even though priv lockdown
check is present and interpreting wrongly the irq stat
register.

Add delay of 5ms after ACR command sent to LSPMU(LSPMU takes
~3.5msec to complete the command process) and before polling
the irqstat register in nvgpu to engage priv lockdown in LSPMU.
This additional delay will help to skip reading the irqstat at
corner case during the priv lockdown process.

Bug 3464141
Bug 3482947

Change-Id: I494493a92f6ede5dcb876aeb0d76d54969f0f59e
Signed-off-by: mkumbar <mkumbar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2673246
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/common/pmu/ipc/pmu_msg.c    |  5 --
 drivers/gpu/nvgpu/common/pmu/lsfm/lsfm.c      | 74 ++++++++++++++-----
 .../gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gm20b.c | 16 ----
 .../gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gp10b.c | 15 ----
 .../gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gv100.c | 52 ++++++-------
 drivers/gpu/nvgpu/include/nvgpu/pmu/lsfm.h    |  4 +-
 6 files changed, 85 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/pmu/ipc/pmu_msg.c b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_msg.c
index 6e5b18830..ee0f83be9 100644
--- a/drivers/gpu/nvgpu/common/pmu/ipc/pmu_msg.c
+++ b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_msg.c
@@ -530,11 +530,6 @@ int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu)
 			}
 		}
 
-		err = nvgpu_pmu_lsfm_int_wpr_region(g, pmu, pmu->lsfm);
-		if (err != 0) {
-			return err;
-		}
-
 		return 0;
 	}
 
diff --git a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm.c b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm.c
index 032bda643..ac3daba08 100644
--- a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm.c
+++ b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm.c
@@ -55,16 +55,37 @@ static bool is_lsfm_supported(struct gk20a *g,
 	return false;
 }
 
-int nvgpu_pmu_lsfm_int_wpr_region(struct gk20a *g,
+static int lsfm_int_wpr_region(struct gk20a *g,
 	struct nvgpu_pmu *pmu, struct nvgpu_pmu_lsfm *lsfm)
 {
-	if (is_lsfm_supported(g, pmu, lsfm)) {
-		if (lsfm->init_wpr_region != NULL) {
-			return lsfm->init_wpr_region(g, pmu);
+	int status = 0;
+
+	status = nvgpu_pmu_wait_fw_ready(g, g->pmu);
+	if (status != 0) {
+		nvgpu_err(g, "PMU not ready to process requests");
+		goto done;
+	}
+
+	if (lsfm->init_wpr_region != NULL) {
+		status = lsfm->init_wpr_region(g, pmu);
+	} else {
+		status = -EINVAL;
+		goto done;
+	}
+
+	if (status == 0) {
+		pmu_wait_message_cond(g->pmu,
+				nvgpu_get_poll_timeout(g),
+				&lsfm->is_wpr_init_done, 1U);
+		/* check again if it still not ready indicate an error */
+		if (!lsfm->is_wpr_init_done) {
+			nvgpu_err(g, "PMU not ready to load LSF");
+			status = -ETIMEDOUT;
 		}
 	}
 
-	return 0;
+done:
+	return status;
 }
 
 int nvgpu_pmu_lsfm_bootstrap_ls_falcon(struct gk20a *g,
@@ -72,22 +93,41 @@ int nvgpu_pmu_lsfm_bootstrap_ls_falcon(struct gk20a *g,
 {
 	int status = 0;
 
-	if (is_lsfm_supported(g, pmu, lsfm)) {
-		if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
-			if (lsfm->bootstrap_ls_falcon != NULL) {
-				status = lsfm->bootstrap_ls_falcon(g, pmu, lsfm,
-					falcon_id_mask);
-			}
-		} else {
-			status = lsfm->bootstrap_ls_falcon(g, pmu, lsfm, FALCON_ID_FECS);
-			if (status != 0) {
-				return status;
-			}
+	if (!is_lsfm_supported(g, pmu, lsfm)) {
+		return 0;
+	}
 
-			status = lsfm->bootstrap_ls_falcon(g, pmu, lsfm, FALCON_ID_GPCCS);
+	/*
+	 * check whether pmu is ready to bootstrap lsf, if not send
+	 * the init WPR region command and wait for completion.
+	 */
+	if (!lsfm->is_wpr_init_done) {
+		status = lsfm_int_wpr_region(g, pmu, lsfm);
+		if (status != 0) {
+			nvgpu_err(g, "LSF init WPR region failed");
+			goto done;
 		}
 	}
 
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+		if (lsfm->bootstrap_ls_falcon != NULL) {
+			status = lsfm->bootstrap_ls_falcon(g, pmu, lsfm,
+				falcon_id_mask);
+		}
+	} else {
+		status = lsfm->bootstrap_ls_falcon(g, pmu, lsfm, FALCON_ID_FECS);
+		if (status != 0) {
+			goto done;
+		}
+
+		status = lsfm->bootstrap_ls_falcon(g, pmu, lsfm, FALCON_ID_GPCCS);
+	}
+
+done:
+	if (status != 0) {
+			nvgpu_err(g, "LSF Load failed");
+	}
+
 	return status;
 }
 
diff --git a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gm20b.c b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gm20b.c
index 64c259173..2446aaf8b 100644
--- a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gm20b.c
+++ b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gm20b.c
@@ -102,10 +102,6 @@ static int gm20b_pmu_lsfm_bootstrap_falcon(struct gk20a *g,
 
 	lsfm->loaded_falcon_id = 0U;
 
-	if (!lsfm->is_wpr_init_done) {
-		return -EINVAL;
-	}
-
 	/* send message to load FECS falcon */
 	(void) memset(&cmd, 0, sizeof(struct pmu_cmd));
 	cmd.hdr.unit_id = PMU_UNIT_ACR;
@@ -137,18 +133,6 @@ static int gm20b_pmu_lsfm_bootstrap_ls_falcon(struct gk20a *g,
 		return -EINVAL;
 	}
 
-	/* check whether pmu is ready to bootstrap lsf if not wait for it */
-	if (!lsfm->is_wpr_init_done) {
-		pmu_wait_message_cond(g->pmu,
-				nvgpu_get_poll_timeout(g),
-				&lsfm->is_wpr_init_done, 1U);
-		/* check again if it still not ready indicate an error */
-		if (!lsfm->is_wpr_init_done) {
-			nvgpu_err(g, "PMU not ready to load LSF");
-			return -ETIMEDOUT;
-		}
-	}
-
 	/* load FECS */
 	nvgpu_falcon_mailbox_write(&g->fecs_flcn, FALCON_MAILBOX_0, ~U32(0x0U));
 
diff --git a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gp10b.c b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gp10b.c
index 0a1bd1944..732cbfb4f 100644
--- a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gp10b.c
+++ b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gp10b.c
@@ -43,9 +43,6 @@ static int gp10b_pmu_lsfm_bootstrap_falcon(struct gk20a *g,
 	lsfm->loaded_falcon_id = 0U;
 
 	nvgpu_pmu_dbg(g, "wprinit status = %x", lsfm->is_wpr_init_done);
-	if (!lsfm->is_wpr_init_done) {
-		return -EINVAL;
-	}
 
 	/* send message to load FECS falcon */
 	(void) memset(&cmd, 0, sizeof(struct pmu_cmd));
@@ -90,18 +87,6 @@ static int gp10b_pmu_lsfm_bootstrap_ls_falcon(struct gk20a *g,
 	}
 
 	lsfm->loaded_falcon_id = 0U;
-	/* check whether pmu is ready to bootstrap lsf if not wait for it */
-	if (!lsfm->is_wpr_init_done) {
-		pmu_wait_message_cond(g->pmu,
-				nvgpu_get_poll_timeout(g),
-				&lsfm->is_wpr_init_done, 1U);
-		/* check again if it still not ready indicate an error */
-		if (!lsfm->is_wpr_init_done) {
-			nvgpu_err(g, "PMU not ready to load LSF");
-			err = -ETIMEDOUT;
-			goto done;
-		}
-	}
 
 	/* bootstrap falcon(s) */
 	err = gp10b_pmu_lsfm_bootstrap_falcon(g, pmu, lsfm,
diff --git a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gv100.c b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gv100.c
index 18424c28c..2d6334688 100644
--- a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gv100.c
+++ b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm_sw_gv100.c
@@ -54,6 +54,15 @@ static int gv100_pmu_lsfm_init_acr_wpr_region(struct gk20a *g,
 			status);
 	}
 
+	/*
+	 * Add delay of 2ms after init region command sent to LSPMU(LSPMU
+	 * takes ~350usec to complete the command process) and before polling
+	 * the irqstat register in nvgpu to engage priv lockdown in LSPMU.
+	 * This additional delay will help to skip reading the irqstat
+	 * incorrectly at corner case during the priv lockdown process.
+	 */
+	nvgpu_msleep(2);
+
 	return status;
 }
 
@@ -75,18 +84,6 @@ static int gv100_pmu_lsfm_bootstrap_ls_falcon(struct gk20a *g,
 	}
 
 	lsfm->loaded_falcon_id = 0U;
-	/* check whether pmu is ready to bootstrap lsf if not wait for it */
-	if (!lsfm->is_wpr_init_done) {
-		pmu_wait_message_cond(g->pmu,
-			nvgpu_get_poll_timeout(g),
-			&lsfm->is_wpr_init_done, 1U);
-		/* check again if it still not ready indicate an error */
-		if (!lsfm->is_wpr_init_done) {
-			nvgpu_err(g, "PMU not ready to load LSF");
-			status = -ETIMEDOUT;
-			goto exit;
-		}
-	}
 
 	(void) memset(&rpc, 0,
 		sizeof(struct nv_pmu_rpc_struct_acr_bootstrap_gr_falcons));
@@ -99,6 +96,15 @@ static int gv100_pmu_lsfm_bootstrap_ls_falcon(struct gk20a *g,
 		goto exit;
 	}
 
+	/*
+	 * Add delay of 5ms after bootstrap command sent to LSPMU(LSPMU takes
+	 * ~3.5msec to complete the command process) and before polling
+	 * the irqstat register in nvgpu to engage priv lockdown in LSPMU.
+	 * This additional delay will help to skip reading the irqstat
+	 * incorrectly at corner case during the priv lockdown process.
+	 */
+	nvgpu_msleep(5);
+
 	pmu_wait_message_cond(g->pmu, nvgpu_get_poll_timeout(g),
 		&lsfm->loaded_falcon_id, 1U);
 
@@ -139,19 +145,6 @@ static int gv100_pmu_lsfm_bootstrap_ls_falcon_eng(struct gk20a *g,
 
 	lsfm->loaded_falcon_id = 0U;
 
-	/* check whether pmu is ready to bootstrap lsf if not wait for it */
-	if (!lsfm->is_wpr_init_done) {
-		pmu_wait_message_cond(g->pmu,
-			nvgpu_get_poll_timeout(g),
-			&lsfm->is_wpr_init_done, 1U);
-		/* check again if it still not ready indicate an error */
-		if (!lsfm->is_wpr_init_done) {
-			nvgpu_err(g, "PMU not ready to load LSF");
-			status = -ETIMEDOUT;
-			goto exit;
-		}
-	}
-
 	(void) memset(&rpc, 0,
 		sizeof(struct nv_pmu_rpc_struct_acr_bootstrap_falcon));
 
@@ -173,6 +166,15 @@ static int gv100_pmu_lsfm_bootstrap_ls_falcon_eng(struct gk20a *g,
 		goto exit;
 	}
 
+	/*
+	 * Add delay of 5ms after bootstrap command sent to LSPMU(LSPMU takes
+	 * ~3.5msec to complete the command process) and before polling
+	 * the irqstat register in nvgpu to engage priv lockdown in LSPMU.
+	 * This additional delay will help to skip reading the irqstat
+	 * incorrectly at corner case during the priv lockdown process.
+	 */
+	nvgpu_msleep(5);
+
 	pmu_wait_message_cond(g->pmu, nvgpu_get_poll_timeout(g),
 		&lsfm->loaded_falcon_id, 1U);
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu/lsfm.h b/drivers/gpu/nvgpu/include/nvgpu/pmu/lsfm.h
index a13aed734..b781cc49b 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/pmu/lsfm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmu/lsfm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -36,8 +36,6 @@ struct nvgpu_pmu_lsfm {
 	int (*ls_pmu_cmdline_args_copy)(struct gk20a *g, struct nvgpu_pmu *pmu);
 };
 
-int nvgpu_pmu_lsfm_int_wpr_region(struct gk20a *g,
-	struct nvgpu_pmu *pmu, struct nvgpu_pmu_lsfm *lsfm);
 int nvgpu_pmu_lsfm_bootstrap_ls_falcon(struct gk20a *g,
 	struct nvgpu_pmu *pmu, struct nvgpu_pmu_lsfm *lsfm, u32 falcon_id_mask);
 int nvgpu_pmu_lsfm_ls_pmu_cmdline_args_copy(struct gk20a *g,