From dc366ea4b4b815906d18d9dd8f32438ef84b41ba Mon Sep 17 00:00:00 2001 From: Divya Singhatwaria Date: Fri, 23 Aug 2019 15:45:48 +0530 Subject: [PATCH] gpu: nvgpu: Fix PMU destroy sequence A call to exit the PMU state machine/kthread must be prioritized over any other state change. It was possible to set the state as PMU_STATE_EXIT, signal the kthread and overwrite the state before the kthread has had the chance to exit its loop. This may lead to a "lost" signal, resulting in indefinite wait during the destroy sequence. Faulting sequence: 1. pmu_state = PMU_STATE_EXIT in nvgpu_pmu_destroy() 2. cond_signal() 3. pmu_state = PMU_STATE_LOADING_PG_BUF 4. PMU kthread wakes up 5. PMU kthread processes PMU_STATE_LOADING_PG_BUF 6. PMU kthread sleeps 7. nvgpu_pmu_destroy() waits indefinitely This patch adds a sticky flag to indicate PMU_STATE_EXIT, irrespective of any subsequent changes to pmu_state. The PMU PG init kthread may wait on a call to NVGPU_COND_WAIT_INTERRUPTIBLE, which requires a corresponding call to nvgpu_cond_signal_interruptible() as the core kernel code requires this task mask to wake-up an interruptible task. Bug 2658750 Bug 200532122 Change-Id: I61beae80673486f83bf60c703a8af88b066a1c36 Signed-off-by: Divya Singhatwaria Reviewed-on: https://git-master.nvidia.com/r/2181926 Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-misra Reviewed-by: Mahantesh Kumbar GVS: Gerrit_Virtual_Submit Reviewed-by: Deepak Nibade Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/pmu/fw/fw.c | 4 ++++ drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.c | 4 +++- drivers/gpu/nvgpu/include/nvgpu/pmu/pmu_pg.h | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/nvgpu/common/pmu/fw/fw.c b/drivers/gpu/nvgpu/common/pmu/fw/fw.c index ca0b25449..cbc78d229 100644 --- a/drivers/gpu/nvgpu/common/pmu/fw/fw.c +++ b/drivers/gpu/nvgpu/common/pmu/fw/fw.c @@ -65,6 +65,10 @@ void nvgpu_pmu_fw_state_change(struct gk20a *g, struct nvgpu_pmu *pmu, nvgpu_smp_wmb(); pmu->fw->state = pmu_state; + /* Set a sticky flag to indicate PMU state exit */ + if (pmu_state == PMU_FW_STATE_EXIT) { + pmu->pg->pg_init.state_destroy = true; + } if (post_change_event) { if (g->can_elpg) { pmu->pg->pg_init.state_change = true; diff --git a/drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.c b/drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.c index eed137dd0..c343a3239 100644 --- a/drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.c +++ b/drivers/gpu/nvgpu/common/pmu/pg/pmu_pg.c @@ -798,6 +798,8 @@ static void pmu_pg_kill_task(struct gk20a *g, struct nvgpu_pmu *pmu, nvgpu_udelay(2); } while (nvgpu_timeout_expired_msg(&timeout, "timeout - waiting PMU state machine thread stop") == 0); + /* Reset the flag for next time */ + pmu->pg->pg_init.state_destroy = false; } else { nvgpu_thread_join(&pg->pg_init.state_task); } @@ -821,7 +823,7 @@ static int pmu_pg_task(void *arg) pmu->pg->pg_init.state_change = false; pmu_state = nvgpu_pmu_get_fw_state(g, pmu); - if (pmu_state == PMU_FW_STATE_EXIT) { + if (pmu->pg->pg_init.state_destroy) { nvgpu_pmu_dbg(g, "pmu state exit"); break; } diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu/pmu_pg.h b/drivers/gpu/nvgpu/include/nvgpu/pmu/pmu_pg.h index a2cf29728..0661a24da 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmu/pmu_pg.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmu/pmu_pg.h @@ -61,6 +61,7 @@ struct pmu_pg_stats_data; struct nvgpu_pg_init { bool state_change; + bool state_destroy; struct nvgpu_cond wq; struct nvgpu_thread state_task; };