gpu: nvgpu: don't skip setting same clk in arbiter

In the current setting, clock arbiter skips setting
the clock if its already set previously. The value
set by the arbiter is stored in
"struct nvgpu_clk_arb->actual" whenever the clock is
updated via the arbiter. However, DVFS might also
update the clock and the updates are not synchronized
with the arbiter. Hence, ensure that any clock
requests are always updated i.e. the requested rate is
set even if the previous rate remains the same.

In the devfreq scale() part, scale emc when clk_arb
is active and skip setting of clocks.

Bug 3666615

Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
Change-Id: I32bf4dbf81b19fdd6fa0bdec3a6c9a9312b78eca
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2727787
Reviewed-by: svcacv <svcacv@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
This commit is contained in:
Debarshi Dutta
2022-06-13 12:18:42 +05:30
committed by mobile promotions
parent 4194c35e17
commit 21ab579341
3 changed files with 24 additions and 4 deletions

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -305,11 +305,23 @@ void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb)
gpc2clk_session_target = gpc2clk_target; gpc2clk_session_target = gpc2clk_target;
/* When DVFS is enabled, there is a mismatch between
* arb->actual->gpc2clk as dvfs doesn't synchronize
* values w.r.t Arbiter. Hence, allow the clocks
* to be set irrespective of whether it exists.
*
* CONFIG_GK20A_DEVFREQ is defined only for Linux
* and DVFS is supported only for linux. For, other
* platforms, arb->actual->gpc2clk contains the correct
* value.
*/
#ifndef CONFIG_GK20A_DEVFREQ
if (arb->actual->gpc2clk == gpc2clk_target) { if (arb->actual->gpc2clk == gpc2clk_target) {
nvgpu_atomic_inc(&arb->req_nr); nvgpu_atomic_inc(&arb->req_nr);
nvgpu_cond_signal_interruptible(&arb->request_wq); nvgpu_cond_signal_interruptible(&arb->request_wq);
goto exit_arb; goto exit_arb;
} }
#endif
nvgpu_mutex_acquire(&arb->pstate_lock); nvgpu_mutex_acquire(&arb->pstate_lock);
@@ -344,6 +356,8 @@ void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb)
goto exit_arb; goto exit_arb;
} }
g->last_freq = rounded_rate;
actual = ((NV_READ_ONCE(arb->actual)) == &arb->actual_pool[0] ? actual = ((NV_READ_ONCE(arb->actual)) == &arb->actual_pool[0] ?
&arb->actual_pool[1] : &arb->actual_pool[0]); &arb->actual_pool[1] : &arb->actual_pool[0]);

View File

@@ -100,6 +100,9 @@ static void nvgpu_init_vars(struct gk20a *g)
/* Init the clock req count to 0 */ /* Init the clock req count to 0 */
nvgpu_atomic_set(&g->clk_arb_global_nr, 0); nvgpu_atomic_set(&g->clk_arb_global_nr, 0);
/* Atomic set doesn't guarantee a barrier */
nvgpu_smp_wmb();
nvgpu_mutex_init(&l->ctrl_privs_lock); nvgpu_mutex_init(&l->ctrl_privs_lock);
nvgpu_init_list_node(&l->ctrl_privs); nvgpu_init_list_node(&l->ctrl_privs);

View File

@@ -163,11 +163,13 @@ static int gk20a_scale_target(struct device *dev, unsigned long *freq,
struct devfreq *devfreq = l->devfreq; struct devfreq *devfreq = l->devfreq;
#endif #endif
unsigned long local_freq = *freq; unsigned long local_freq = *freq;
unsigned long rounded_rate; unsigned long rounded_rate = 0;
unsigned long min_freq = 0, max_freq = 0; unsigned long min_freq = 0, max_freq = 0;
if (nvgpu_clk_arb_has_active_req(g)) if (nvgpu_clk_arb_has_active_req(g)) {
return 0; rounded_rate = g->last_freq;
goto post_scale;
}
/* /*
* Calculate floor and cap frequency values * Calculate floor and cap frequency values
* *
@@ -222,6 +224,7 @@ static int gk20a_scale_target(struct device *dev, unsigned long *freq,
g->last_freq = *freq; g->last_freq = *freq;
post_scale:
/* postscale will only scale emc (dram clock) if evaluating /* postscale will only scale emc (dram clock) if evaluating
* gk20a_tegra_get_emc_rate() produces a new or different emc * gk20a_tegra_get_emc_rate() produces a new or different emc
* target because the load or_and gpufreq has changed */ * target because the load or_and gpufreq has changed */