gpu: nvgpu: ga10b+: emc scaling using ICC

For EMC frequency scaling, prior to ga10b, nvgpu driver
was using BWMGR. On ga10b+, BWMGR support is deprecated
and moved to Linux ICC framework.

Jira NVGPU-7312
Bug 3514055
Bug 200766984

Change-Id: Ib1f87afe021414dfc563e007823f93098937fe59
Signed-off-by: Johnny Liu <johnliu@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2706374
Reviewed-by: svcacv <svcacv@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Rajkumar Kasirajan <rkasirajan@nvidia.com>
Reviewed-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Johnny Liu
2022-05-03 13:10:00 +08:00
committed by mobile promotions
parent fac998940c
commit 69ec2dcff7
3 changed files with 119 additions and 45 deletions

View File

@@ -23,8 +23,10 @@
#include <linux/iommu.h> #include <linux/iommu.h>
#include <linux/hashtable.h> #include <linux/hashtable.h>
#include <linux/clk.h> #include <linux/clk.h>
#ifdef CONFIG_TEGRA_BWMGR #if defined(CONFIG_INTERCONNECT) && defined(CONFIG_TEGRA_T23X_GRHOST)
#include <linux/platform/tegra/emc_bwmgr.h> #include <linux/platform/tegra/mc_utils.h>
#include <linux/interconnect.h>
#include <dt-bindings/interconnect/tegra_icc_id.h>
#endif #endif
#include <linux/pm_runtime.h> #include <linux/pm_runtime.h>
#include <linux/fuse.h> #include <linux/fuse.h>
@@ -81,6 +83,35 @@ struct gk20a_platform_clk tegra_ga10b_clocks[] = {
#define NVGPU_GPC0_DISABLE BIT(0) #define NVGPU_GPC0_DISABLE BIT(0)
#define NVGPU_GPC1_DISABLE BIT(1) #define NVGPU_GPC1_DISABLE BIT(1)
#if defined(CONFIG_INTERCONNECT) && defined(CONFIG_TEGRA_T23X_GRHOST)
static int ga10b_tegra_set_emc_rate(struct gk20a_scale_profile *profile,
unsigned long gpu_rate, unsigned long emc3d_ratio)
{
unsigned long emc_rate, rate;
unsigned long peak_bw;
u32 emc_freq_kbps;
if (profile && profile->private_data) {
emc_rate = gpu_rate * EMC_BW_RATIO;
emc_rate = (emc_rate < gpu_rate) ? ULONG_MAX : emc_rate;
rate = emc_rate * emc3d_ratio;
emc_rate = (rate < emc_rate && emc3d_ratio > 0) ? ULONG_MAX : rate;
emc_rate /= 1000;
/* peak bandwidth in kilobytes per second */
peak_bw = emc_freq_to_bw(emc_rate/1000);
emc_freq_kbps = (peak_bw > UINT_MAX) ? UINT_MAX : peak_bw;
return icc_set_bw((struct icc_path *)profile->private_data,
0, emc_freq_kbps);
} else {
/* EMC scaling profile is not available */
return 0;
}
}
#endif
static bool ga10b_tegra_is_clock_available(struct gk20a *g, char *clk_name) static bool ga10b_tegra_is_clock_available(struct gk20a *g, char *clk_name)
{ {
u32 gpc_disable = 0U; u32 gpc_disable = 0U;
@@ -190,28 +221,40 @@ static int ga10b_tegra_get_clocks(struct device *dev)
void ga10b_tegra_scale_init(struct device *dev) void ga10b_tegra_scale_init(struct device *dev)
{ {
#ifdef CONFIG_TEGRA_BWMG #if defined(CONFIG_INTERCONNECT) && defined(CONFIG_TEGRA_T23X_GRHOST)
struct gk20a_platform *platform = gk20a_get_platform(dev); struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a_scale_profile *profile = platform->g->scale_profile; struct gk20a_scale_profile *profile = platform->g->scale_profile;
struct icc_path *icc_path_handle;
if (!profile) if (!profile)
return; return;
platform->g->emc3d_ratio = EMC3D_GA10B_RATIO; platform->g->emc3d_ratio = EMC3D_GA10B_RATIO;
gp10b_tegra_scale_init(dev); if ((struct icc_path *)profile->private_data)
return;
icc_path_handle = icc_get(dev, TEGRA_ICC_GPU, TEGRA_ICC_PRIMARY);
if (IS_ERR_OR_NULL(icc_path_handle)) {
dev_err(dev, "%s unable to get icc path (err=%ld)\n",
__func__, PTR_ERR(icc_path_handle));
return;
}
profile->private_data = (void *)icc_path_handle;
#endif #endif
} }
static void ga10b_tegra_scale_exit(struct device *dev) static void ga10b_tegra_scale_exit(struct device *dev)
{ {
#ifdef CONFIG_TEGRA_BWMGR #if defined(CONFIG_INTERCONNECT) && defined(CONFIG_TEGRA_T23X_GRHOST)
struct gk20a_platform *platform = gk20a_get_platform(dev); struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a_scale_profile *profile = platform->g->scale_profile; struct gk20a_scale_profile *profile = platform->g->scale_profile;
if (profile) if (profile && profile->private_data) {
tegra_bwmgr_unregister( icc_put((struct icc_path *)profile->private_data);
(struct tegra_bwmgr_client *)profile->private_data); profile->private_data = NULL;
}
#endif #endif
} }
@@ -320,17 +363,19 @@ static bool ga10b_tegra_is_railgated(struct device *dev)
static int ga10b_tegra_railgate(struct device *dev) static int ga10b_tegra_railgate(struct device *dev)
{ {
#ifdef CONFIG_TEGRA_BWMGR #if defined(CONFIG_INTERCONNECT) && defined(CONFIG_TEGRA_T23X_GRHOST)
struct gk20a_platform *platform = gk20a_get_platform(dev); struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a_scale_profile *profile = platform->g->scale_profile; struct gk20a_scale_profile *profile = platform->g->scale_profile;
int ret = 0;
/* remove emc frequency floor */ /* remove emc frequency floor */
if (profile) if (profile && profile->private_data) {
tegra_bwmgr_set_emc( ret = icc_set_bw((struct icc_path *)profile->private_data,
(struct tegra_bwmgr_client *)profile->private_data, 0, 0);
0, TEGRA_BWMGR_SET_EMC_FLOOR); if (ret)
#endif /* CONFIG_TEGRA_BWMGR */ dev_err(dev, "failed to set emc freq rate:%d\n", ret);
}
#endif
gp10b_tegra_clks_control(dev, false); gp10b_tegra_clks_control(dev, false);
return 0; return 0;
@@ -398,29 +443,30 @@ static int ga10b_tegra_bpmp_mrq_set(struct device *dev)
static int ga10b_tegra_unrailgate(struct device *dev) static int ga10b_tegra_unrailgate(struct device *dev)
{ {
int ret = 0; int ret = 0;
#if defined(CONFIG_INTERCONNECT) && defined(CONFIG_TEGRA_T23X_GRHOST)
struct gk20a_platform *platform = gk20a_get_platform(dev); struct gk20a_platform *platform = gk20a_get_platform(dev);
#ifdef CONFIG_TEGRA_BWMGR
struct gk20a_scale_profile *profile = platform->g->scale_profile; struct gk20a_scale_profile *profile = platform->g->scale_profile;
unsigned long max_rate;
long rate;
#endif #endif
#if defined(CONFIG_TEGRA_BPMP) #if defined(CONFIG_TEGRA_BPMP)
ret = ga10b_tegra_bpmp_mrq_set(dev); ret = ga10b_tegra_bpmp_mrq_set(dev);
if (ret != 0) { if (ret != 0)
nvgpu_err(platform->g, "ga10b_tegra_bpmp_mrq_set failed");
return ret; return ret;
}
#endif #endif
/* Setting clk controls */ /* Setting clk controls */
gp10b_tegra_clks_control(dev, true); gp10b_tegra_clks_control(dev, true);
#ifdef CONFIG_TEGRA_BWMGR #if defined(CONFIG_INTERCONNECT) && defined(CONFIG_TEGRA_T23X_GRHOST)
/* to start with set emc frequency floor to max rate*/ /* to start with set emc frequency floor for max gpu sys rate*/
if (profile) rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1));
tegra_bwmgr_set_emc( max_rate = (rate < 0) ? ULONG_MAX : (unsigned long)rate;
(struct tegra_bwmgr_client *)profile->private_data, ret = ga10b_tegra_set_emc_rate(profile,
tegra_bwmgr_get_max_emc_rate(), max_rate, platform->g->emc3d_ratio);
TEGRA_BWMGR_SET_EMC_FLOOR); if (ret)
dev_err(dev, "failed to set emc freq rate:%d\n", ret);
#endif #endif
return ret; return ret;
} }
@@ -486,6 +532,34 @@ static int ga10b_tegra_set_fbp_pg_mask(struct device *dev, u32 dt_fbp_pg_mask)
return -EINVAL; return -EINVAL;
} }
void ga10b_tegra_postscale(struct device *pdev,
unsigned long freq)
{
#if defined(CONFIG_INTERCONNECT) && defined(CONFIG_TEGRA_T23X_GRHOST)
struct gk20a_platform *platform = gk20a_get_platform(pdev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
struct gk20a *g = get_gk20a(pdev);
int ret = 0;
nvgpu_log_fn(g, " ");
if (profile && profile->private_data &&
!platform->is_railgated(pdev)) {
unsigned long emc_scale;
if (freq <= gp10b_freq_table[0])
emc_scale = 0;
else
emc_scale = g->emc3d_ratio;
ret = ga10b_tegra_set_emc_rate(profile,
freq, emc_scale);
if (ret)
dev_err(pdev, "failed to set emc freq rate:%d\n", ret);
}
nvgpu_log_fn(g, "done");
#endif
}
static void ga10b_tegra_set_valid_tpc_pg_mask(struct gk20a_platform *platform) static void ga10b_tegra_set_valid_tpc_pg_mask(struct gk20a_platform *platform)
{ {
u32 i; u32 i;
@@ -674,7 +748,7 @@ struct gk20a_platform ga10b_tegra_platform = {
/* frequency scaling configuration */ /* frequency scaling configuration */
.initscale = ga10b_tegra_scale_init, .initscale = ga10b_tegra_scale_init,
.prescale = gp10b_tegra_prescale, .prescale = gp10b_tegra_prescale,
.postscale = gp10b_tegra_postscale, .postscale = ga10b_tegra_postscale,
.devfreq_governor = "nvhost_podgov", .devfreq_governor = "nvhost_podgov",
.qos_notify = gk20a_scale_qos_notify, .qos_notify = gk20a_scale_qos_notify,

View File

@@ -1,7 +1,7 @@
/* /*
* GP10B Platform (SoC) Interface * GP10B Platform (SoC) Interface
* *
* Copyright (c) 2014-2021, NVIDIA Corporation. All rights reserved. * Copyright (c) 2014-2022, NVIDIA Corporation. All rights reserved.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
@@ -25,6 +25,22 @@ struct gk20a_platform_clk {
unsigned long default_rate; unsigned long default_rate;
}; };
/* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */
#define GP10B_FREQ_SELECT_STEP 8
/* Max number of freq supported in h/w */
#define GP10B_MAX_SUPPORTED_FREQS 120
/* Allow limited set of frequencies to be available */
#define GP10B_NUM_SUPPORTED_FREQS ((GP10B_MAX_SUPPORTED_FREQS) / (GP10B_FREQ_SELECT_STEP))
#define TEGRA_GP10B_BW_PER_FREQ 64
#define TEGRA_DDR4_BW_PER_FREQ 16
#define EMC_BW_RATIO (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ)
extern unsigned long gp10b_freq_table[GP10B_NUM_SUPPORTED_FREQS];
void gp10b_tegra_clks_control(struct device *dev, bool enable); void gp10b_tegra_clks_control(struct device *dev, bool enable);
int gp10b_tegra_get_clocks(struct device *dev); int gp10b_tegra_get_clocks(struct device *dev);
int gp10b_tegra_reset_assert(struct device *dev); int gp10b_tegra_reset_assert(struct device *dev);

View File

@@ -54,26 +54,10 @@
#include "scale.h" #include "scale.h"
#include "module.h" #include "module.h"
/* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */ unsigned long gp10b_freq_table[GP10B_NUM_SUPPORTED_FREQS];
#define GP10B_FREQ_SELECT_STEP 8
/* Max number of freq supported in h/w */
#define GP10B_MAX_SUPPORTED_FREQS 120
/* Allow limited set of frequencies to be available */
#define GP10B_NUM_SUPPORTED_FREQS ((GP10B_MAX_SUPPORTED_FREQS) / (GP10B_FREQ_SELECT_STEP))
static unsigned long
gp10b_freq_table[GP10B_NUM_SUPPORTED_FREQS];
static bool freq_table_init_complete; static bool freq_table_init_complete;
static int num_supported_freq; static int num_supported_freq;
#define TEGRA_GP10B_BW_PER_FREQ 64
#define TEGRA_DDR4_BW_PER_FREQ 16
#define EMC_BW_RATIO (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ)
#define GPCCLK_INIT_RATE 1000000000 #define GPCCLK_INIT_RATE 1000000000
struct gk20a_platform_clk tegra_gp10b_clocks[] = { struct gk20a_platform_clk tegra_gp10b_clocks[] = {