From 21ab579341e7271daa0c9bdcbb060614306dc535 Mon Sep 17 00:00:00 2001 From: Debarshi Dutta Date: Mon, 13 Jun 2022 12:18:42 +0530 Subject: [PATCH] gpu: nvgpu: don't skip setting same clk in arbiter In the current setting, clock arbiter skips setting the clock if its already set previously. The value set by the arbiter is stored in "struct nvgpu_clk_arb->actual" whenever the clock is updated via the arbiter. However, DVFS might also update the clock and the updates are not synchronized with the arbiter. Hence, ensure that any clock requests are always updated i.e. the requested rate is set even if the previous rate remains the same. In the devfreq scale() part, scale emc when clk_arb is active and skip setting of clocks. Bug 3666615 Signed-off-by: Debarshi Dutta Change-Id: I32bf4dbf81b19fdd6fa0bdec3a6c9a9312b78eca Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2727787 Reviewed-by: svcacv Reviewed-by: Vijayakumar Subbu GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.c | 16 +++++++++++++++- drivers/gpu/nvgpu/os/linux/driver_common.c | 3 +++ drivers/gpu/nvgpu/os/linux/scale.c | 9 ++++++--- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.c b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.c index fe844a251..54e43fd32 100644 --- a/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.c +++ b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -305,11 +305,23 @@ void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb) gpc2clk_session_target = gpc2clk_target; +/* When DVFS is enabled, there is a mismatch between + * arb->actual->gpc2clk as dvfs doesn't synchronize + * values w.r.t Arbiter. Hence, allow the clocks + * to be set irrespective of whether it exists. + * + * CONFIG_GK20A_DEVFREQ is defined only for Linux + * and DVFS is supported only for linux. For, other + * platforms, arb->actual->gpc2clk contains the correct + * value. + */ +#ifndef CONFIG_GK20A_DEVFREQ if (arb->actual->gpc2clk == gpc2clk_target) { nvgpu_atomic_inc(&arb->req_nr); nvgpu_cond_signal_interruptible(&arb->request_wq); goto exit_arb; } +#endif nvgpu_mutex_acquire(&arb->pstate_lock); @@ -344,6 +356,8 @@ void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb) goto exit_arb; } + g->last_freq = rounded_rate; + actual = ((NV_READ_ONCE(arb->actual)) == &arb->actual_pool[0] ? &arb->actual_pool[1] : &arb->actual_pool[0]); diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c index c6de85e45..b55cbaa72 100644 --- a/drivers/gpu/nvgpu/os/linux/driver_common.c +++ b/drivers/gpu/nvgpu/os/linux/driver_common.c @@ -100,6 +100,9 @@ static void nvgpu_init_vars(struct gk20a *g) /* Init the clock req count to 0 */ nvgpu_atomic_set(&g->clk_arb_global_nr, 0); + /* Atomic set doesn't guarantee a barrier */ + nvgpu_smp_wmb(); + nvgpu_mutex_init(&l->ctrl_privs_lock); nvgpu_init_list_node(&l->ctrl_privs); diff --git a/drivers/gpu/nvgpu/os/linux/scale.c b/drivers/gpu/nvgpu/os/linux/scale.c index 3ab9c9331..7e669120a 100644 --- a/drivers/gpu/nvgpu/os/linux/scale.c +++ b/drivers/gpu/nvgpu/os/linux/scale.c @@ -163,11 +163,13 @@ static int gk20a_scale_target(struct device *dev, unsigned long *freq, struct devfreq *devfreq = l->devfreq; #endif unsigned long local_freq = *freq; - unsigned long rounded_rate; + unsigned long rounded_rate = 0; unsigned long min_freq = 0, max_freq = 0; - if (nvgpu_clk_arb_has_active_req(g)) - return 0; + if (nvgpu_clk_arb_has_active_req(g)) { + rounded_rate = g->last_freq; + goto post_scale; + } /* * Calculate floor and cap frequency values * @@ -222,6 +224,7 @@ static int gk20a_scale_target(struct device *dev, unsigned long *freq, g->last_freq = *freq; +post_scale: /* postscale will only scale emc (dram clock) if evaluating * gk20a_tegra_get_emc_rate() produces a new or different emc * target because the load or_and gpufreq has changed */