From 04cd344b355f403b00cd0d68568bcefa533a72d3 Mon Sep 17 00:00:00 2001 From: Divya Date: Mon, 9 Jan 2023 12:32:39 +0000 Subject: [PATCH] gpu: nvgpu: free rpc_payload when driver is dying - During nvgpu module unload, the poweroff sequence will not wait for the ACK from the PMU for the RPCs sent. - Due to this, rpc_payload struct info will be present in pmu seq struct. - This can lead to memory corruption during unload path. - To avoid this, return a different value for driver shutting down scenario from fw ack function and based on this return value free the RPC payload and release the respective pmu sequence struct. Bug 3789998 Change-Id: I25104828d836ae37e127b40c88209da81754ffb8 Signed-off-by: Divya Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2839968 Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-cert Reviewed-by: Mahantesh Kumbar GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/common/pmu/fw/fw.c | 8 +++---- drivers/gpu/nvgpu/common/pmu/ipc/pmu_cmd.c | 27 +++++++++++++++------- drivers/gpu/nvgpu/include/nvgpu/pmu/fw.h | 18 ++++++++++++++- 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/nvgpu/common/pmu/fw/fw.c b/drivers/gpu/nvgpu/common/pmu/fw/fw.c index c7ef11875..287037b55 100644 --- a/drivers/gpu/nvgpu/common/pmu/fw/fw.c +++ b/drivers/gpu/nvgpu/common/pmu/fw/fw.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -138,7 +138,7 @@ int nvgpu_pmu_wait_fw_ack_status(struct gk20a *g, struct nvgpu_pmu *pmu, * "GPU has disappeared from bus". */ *(volatile u8 *)var = val; - return 0; + return PMU_FW_ACK_STATE_OFF; } if (nvgpu_can_busy(g) == 0) { @@ -151,7 +151,7 @@ int nvgpu_pmu_wait_fw_ack_status(struct gk20a *g, struct nvgpu_pmu *pmu, */ *(volatile u8 *)var = val; - return 0; + return PMU_FW_ACK_DRIVER_SHUTDOWN; } if (g->ops.pmu.pmu_is_interrupted(pmu)) { @@ -163,7 +163,7 @@ int nvgpu_pmu_wait_fw_ack_status(struct gk20a *g, struct nvgpu_pmu *pmu, /* Confirm ACK from PMU before timeout check */ if (*(volatile u8 *)var == val) { - return 0; + return PMU_FW_ACK_RECEIVED; } } while (nvgpu_timeout_expired(&timeout) == 0); diff --git a/drivers/gpu/nvgpu/common/pmu/ipc/pmu_cmd.c b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_cmd.c index 7073c66dd..dd8929ede 100644 --- a/drivers/gpu/nvgpu/common/pmu/ipc/pmu_cmd.c +++ b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_cmd.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -646,9 +646,11 @@ int nvgpu_pmu_rpc_execute(struct nvgpu_pmu *pmu, u8 *rpc, struct pmu_payload payload; struct rpc_handler_payload *rpc_payload = NULL; struct nv_pmu_rpc_header *rpc_header = NULL; + struct pmu_sequence *seq = NULL; pmu_callback callback = NULL; void *rpc_buff = NULL; int status = 0; + int fw_ack_status = 0; if (nvgpu_can_busy(g) == 0) { return 0; @@ -725,17 +727,26 @@ int nvgpu_pmu_rpc_execute(struct nvgpu_pmu *pmu, u8 *rpc, */ if (is_copy_back) { /* wait till RPC execute in PMU & ACK */ - if (nvgpu_pmu_wait_fw_ack_status(g, pmu, - nvgpu_get_poll_timeout(g), - &rpc_payload->complete, 1U) != 0) { + fw_ack_status = nvgpu_pmu_wait_fw_ack_status(g, pmu, + nvgpu_get_poll_timeout(g), + &rpc_payload->complete, 1U); + if (fw_ack_status == -ETIMEDOUT) { nvgpu_err(g, "PMU wait timeout expired."); status = -ETIMEDOUT; goto cleanup; + } else if (fw_ack_status == PMU_FW_ACK_DRIVER_SHUTDOWN) { + /* free allocated memory */ + nvgpu_kfree(g, rpc_payload); + /* release the sequence */ + seq = nvgpu_pmu_sequences_get_seq(pmu->sequences, + cmd.hdr.seq_id); + nvgpu_pmu_seq_release(g, pmu->sequences, seq); + } else { + /* copy back data to caller */ + nvgpu_memcpy(rpc, (u8 *)rpc_buff, size_rpc); + /* free allocated memory */ + nvgpu_kfree(g, rpc_payload); } - /* copy back data to caller */ - nvgpu_memcpy(rpc, (u8 *)rpc_buff, size_rpc); - /* free allocated memory */ - nvgpu_kfree(g, rpc_payload); } return 0; diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu/fw.h b/drivers/gpu/nvgpu/include/nvgpu/pmu/fw.h index e502445c5..aead2a27e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmu/fw.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmu/fw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -50,6 +50,22 @@ struct boardobjgrpmask; #define PMU_FW_STATE_STARTED 7U /* Fully unitialized */ #define PMU_FW_STATE_EXIT 8U /* Exit PMU state machine */ +/* ACK status for RPCs from PMU firmware */ + +/* RPC ACK is recieved from PMU FW */ +#define PMU_FW_ACK_RECEIVED 0 +/* + * PMU state is OFF so RPC ACK will not be received. + * Set the ACK status as State OFF + */ +#define PMU_FW_ACK_STATE_OFF 1 +/* + * Driver is shutting down, so we don't wait + * for ACK from PMU. Set the ACK status to + * Driver Shutdown + */ +#define PMU_FW_ACK_DRIVER_SHUTDOWN 2 + struct pmu_fw_ver_ops { u32 (*get_cmd_line_args_size)(struct nvgpu_pmu *pmu); void (*set_cmd_line_args_cpu_freq)(struct nvgpu_pmu *pmu,