gpu: nvgpu: free rpc_payload when driver is dying

- During nvgpu module unload, the poweroff sequence will not wait for the ACK from the PMU for the RPCs sent. - Due to this, rpc_payload struct info will be present in pmu seq struct. - This can lead to memory corruption during unload path. - To avoid this, return a different value for driver shutting down scenario from fw ack function and based on this return value free the RPC payload and release the respective pmu sequence struct. Bug 3789998 Change-Id: I25104828d836ae37e127b40c88209da81754ffb8 Signed-off-by: Divya <dsinghatwari@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2839968 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Mahantesh Kumbar <mkumbar@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2023-01-09 12:32:39 +00:00
parent 4ed84db8a5
commit 04cd344b35
3 changed files with 40 additions and 13 deletions
--- a/drivers/gpu/nvgpu/common/pmu/fw/fw.c
+++ b/drivers/gpu/nvgpu/common/pmu/fw/fw.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2023, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -138,7 +138,7 @@ int nvgpu_pmu_wait_fw_ack_status(struct gk20a *g, struct nvgpu_pmu *pmu,
 			 * "GPU has disappeared from bus".
 			 */
 			*(volatile u8 *)var = val;
-			return 0;
+			return PMU_FW_ACK_STATE_OFF;
 		}
 		if (nvgpu_can_busy(g) == 0) {
@@ -151,7 +151,7 @@ int nvgpu_pmu_wait_fw_ack_status(struct gk20a *g, struct nvgpu_pmu *pmu,
 			 */
 			*(volatile u8 *)var = val;
-			return 0;
+			return PMU_FW_ACK_DRIVER_SHUTDOWN;
 		}
 		if (g->ops.pmu.pmu_is_interrupted(pmu)) {
@@ -163,7 +163,7 @@ int nvgpu_pmu_wait_fw_ack_status(struct gk20a *g, struct nvgpu_pmu *pmu,
 		/* Confirm ACK from PMU before timeout check */
 		if (*(volatile u8 *)var == val) {
-			return 0;
+			return PMU_FW_ACK_RECEIVED;
 		}
 	} while (nvgpu_timeout_expired(&timeout) == 0);
--- a/drivers/gpu/nvgpu/common/pmu/ipc/pmu_cmd.c
+++ b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_cmd.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2023, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -646,9 +646,11 @@ int nvgpu_pmu_rpc_execute(struct nvgpu_pmu *pmu, u8 *rpc,
 	struct pmu_payload payload;
 	struct rpc_handler_payload *rpc_payload = NULL;
 	struct nv_pmu_rpc_header *rpc_header = NULL;
 	struct pmu_sequence *seq = NULL;
 	pmu_callback callback = NULL;
 	void *rpc_buff = NULL;
 	int status = 0;
 	int fw_ack_status = 0;
 	if (nvgpu_can_busy(g) == 0) {
 		return 0;
@@ -725,17 +727,26 @@ int nvgpu_pmu_rpc_execute(struct nvgpu_pmu *pmu, u8 *rpc,
 	 */
 	if (is_copy_back) {
 		/* wait till RPC execute in PMU & ACK */
-		if (nvgpu_pmu_wait_fw_ack_status(g, pmu,
+		fw_ack_status = nvgpu_pmu_wait_fw_ack_status(g, pmu,
-				nvgpu_get_poll_timeout(g),
+					nvgpu_get_poll_timeout(g),
-				&rpc_payload->complete, 1U) != 0) {
+					&rpc_payload->complete, 1U);
 		if (fw_ack_status == -ETIMEDOUT) {
 			nvgpu_err(g, "PMU wait timeout expired.");
 			status = -ETIMEDOUT;
 			goto cleanup;
 		} else if (fw_ack_status == PMU_FW_ACK_DRIVER_SHUTDOWN) {
 			/* free allocated memory */
 			nvgpu_kfree(g, rpc_payload);
 			/* release the sequence */
 			seq = nvgpu_pmu_sequences_get_seq(pmu->sequences,
 							cmd.hdr.seq_id);
 			nvgpu_pmu_seq_release(g, pmu->sequences, seq);
 		} else {
 			/* copy back data to caller */
 			nvgpu_memcpy(rpc, (u8 *)rpc_buff, size_rpc);
 			/* free allocated memory */
 			nvgpu_kfree(g, rpc_payload);
 		}
 		/* copy back data to caller */
 		nvgpu_memcpy(rpc, (u8 *)rpc_buff, size_rpc);
 		/* free allocated memory */
 		nvgpu_kfree(g, rpc_payload);
 	}
 	return 0;
--- a/drivers/gpu/nvgpu/include/nvgpu/pmu/fw.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmu/fw.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2023, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -50,6 +50,22 @@ struct boardobjgrpmask;
 #define PMU_FW_STATE_STARTED		7U /* Fully unitialized */
 #define PMU_FW_STATE_EXIT			8U /* Exit PMU state machine */
 /* ACK status for RPCs from PMU firmware */
 /* RPC ACK is recieved from PMU FW */
 #define PMU_FW_ACK_RECEIVED		0
 /*
 * PMU state is OFF so RPC ACK will not be received.
 * Set the ACK status as State OFF
 */
 #define PMU_FW_ACK_STATE_OFF		1
 /*
 * Driver is shutting down, so we don't wait
 * for ACK from PMU. Set the ACK status to
 * Driver Shutdown
 */
 #define PMU_FW_ACK_DRIVER_SHUTDOWN	2
 struct pmu_fw_ver_ops {
 	u32 (*get_cmd_line_args_size)(struct nvgpu_pmu *pmu);
 	void (*set_cmd_line_args_cpu_freq)(struct nvgpu_pmu *pmu,