mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 18:42:29 +03:00
gpu: nvgpu: 3d.emc bandwidth ratio policy
Modify the 3d.emc policy to use a formula based on bandwidth and utilization instead of the current sku-dependent policy. Bug 1364894 Change-Id: Id97f765a48f0aa9f5ebeb0c82bccb22db474a1ae Signed-off-by: Samuel Russell <samuelr@nvidia.com> Reviewed-on: http://git-master/r/453586 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Dan Willemsen
parent
04efcaf97e
commit
08dc7c3584
@@ -71,6 +71,8 @@
|
||||
|
||||
#define GK20A_NUM_CDEVS 6
|
||||
|
||||
#define EMC3D_DEFAULT_RATIO 750
|
||||
|
||||
#if defined(GK20A_DEBUG)
|
||||
u32 gk20a_dbg_mask = GK20A_DEFAULT_DBG_MASK;
|
||||
u32 gk20a_dbg_ftrace;
|
||||
@@ -1462,6 +1464,8 @@ static int gk20a_probe(struct platform_device *dev)
|
||||
return err;
|
||||
}
|
||||
|
||||
gk20a->emc3d_ratio = EMC3D_DEFAULT_RATIO;
|
||||
|
||||
/* Initialise scaling */
|
||||
if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
|
||||
gk20a_scale_init(dev);
|
||||
|
||||
@@ -297,6 +297,8 @@ struct gk20a {
|
||||
bool forced_reset;
|
||||
bool allow_all;
|
||||
|
||||
u32 emc3d_ratio;
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
spinlock_t debugfs_lock;
|
||||
struct dentry *debugfs_ltc_enabled;
|
||||
|
||||
@@ -469,6 +469,32 @@ static ssize_t allow_all_enable_store(struct device *device,
|
||||
static DEVICE_ATTR(allow_all, ROOTRW,
|
||||
allow_all_enable_read, allow_all_enable_store);
|
||||
|
||||
static ssize_t emc3d_ratio_store(struct device *device,
|
||||
struct device_attribute *attr, const char *buf, size_t count)
|
||||
{
|
||||
struct platform_device *ndev = to_platform_device(device);
|
||||
struct gk20a *g = get_gk20a(ndev);
|
||||
unsigned long val = 0;
|
||||
|
||||
if (kstrtoul(buf, 10, &val) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
g->emc3d_ratio = val;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t emc3d_ratio_read(struct device *device,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct platform_device *ndev = to_platform_device(device);
|
||||
struct gk20a *g = get_gk20a(ndev);
|
||||
|
||||
return sprintf(buf, "%d\n", g->emc3d_ratio);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store);
|
||||
|
||||
#ifdef CONFIG_PM_RUNTIME
|
||||
static ssize_t force_idle_store(struct device *device,
|
||||
struct device_attribute *attr, const char *buf, size_t count)
|
||||
@@ -566,6 +592,7 @@ void gk20a_remove_sysfs(struct device *dev)
|
||||
device_remove_file(dev, &dev_attr_slcg_enable);
|
||||
device_remove_file(dev, &dev_attr_ptimer_scale_factor);
|
||||
device_remove_file(dev, &dev_attr_elpg_enable);
|
||||
device_remove_file(dev, &dev_attr_emc3d_ratio);
|
||||
device_remove_file(dev, &dev_attr_counters);
|
||||
device_remove_file(dev, &dev_attr_counters_reset);
|
||||
device_remove_file(dev, &dev_attr_load);
|
||||
@@ -593,6 +620,7 @@ void gk20a_create_sysfs(struct platform_device *dev)
|
||||
error |= device_create_file(&dev->dev, &dev_attr_slcg_enable);
|
||||
error |= device_create_file(&dev->dev, &dev_attr_ptimer_scale_factor);
|
||||
error |= device_create_file(&dev->dev, &dev_attr_elpg_enable);
|
||||
error |= device_create_file(&dev->dev, &dev_attr_emc3d_ratio);
|
||||
error |= device_create_file(&dev->dev, &dev_attr_counters);
|
||||
error |= device_create_file(&dev->dev, &dev_attr_counters_reset);
|
||||
error |= device_create_file(&dev->dev, &dev_attr_load);
|
||||
|
||||
@@ -39,16 +39,15 @@
|
||||
#define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */
|
||||
#define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */
|
||||
|
||||
#define TEGRA_GK20A_BW_PER_FREQ 32
|
||||
#define TEGRA_GM20B_BW_PER_FREQ 64
|
||||
#define TEGRA_DDR3_BW_PER_FREQ 16
|
||||
|
||||
extern struct device tegra_vpr_dev;
|
||||
struct gk20a_platform t132_gk20a_tegra_platform;
|
||||
|
||||
struct gk20a_emc_params {
|
||||
long emc_slope;
|
||||
long emc_offset;
|
||||
long emc_dip_slope;
|
||||
long emc_dip_offset;
|
||||
long emc_xmid;
|
||||
bool linear;
|
||||
long bw_ratio;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -189,20 +188,17 @@ fail:
|
||||
* This function returns the minimum emc clock based on gpu frequency
|
||||
*/
|
||||
|
||||
long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq)
|
||||
long gk20a_tegra_get_emc_rate(struct gk20a *g,
|
||||
struct gk20a_emc_params *emc_params, long freq)
|
||||
{
|
||||
long hz;
|
||||
|
||||
freq = INT_TO_FX(HZ_TO_MHZ(freq));
|
||||
hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset;
|
||||
freq = HZ_TO_MHZ(freq);
|
||||
|
||||
hz -= FXMUL(emc_params->emc_dip_slope,
|
||||
FXMUL(freq - emc_params->emc_xmid,
|
||||
freq - emc_params->emc_xmid)) +
|
||||
emc_params->emc_dip_offset;
|
||||
hz = (freq * emc_params->bw_ratio);
|
||||
hz = (hz * min(g->pmu.load_avg, g->emc3d_ratio)) / 1000;
|
||||
|
||||
hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */
|
||||
hz = (hz < 0) ? 0 : hz;
|
||||
hz = MHZ_TO_HZ(hz);
|
||||
|
||||
return hz;
|
||||
}
|
||||
@@ -222,7 +218,7 @@ static void gk20a_tegra_postscale(struct platform_device *pdev,
|
||||
struct gk20a *g = get_gk20a(pdev);
|
||||
|
||||
long after = gk20a_clk_get_rate(g);
|
||||
long emc_target = gk20a_tegra_get_emc_rate(emc_params, after);
|
||||
long emc_target = gk20a_tegra_get_emc_rate(g, emc_params, after);
|
||||
|
||||
clk_set_rate(platform->clk[2], emc_target);
|
||||
}
|
||||
@@ -245,94 +241,34 @@ static void gk20a_tegra_prescale(struct platform_device *pdev)
|
||||
/*
|
||||
* gk20a_tegra_calibrate_emc()
|
||||
*
|
||||
* Compute emc scaling parameters
|
||||
*
|
||||
* Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
|
||||
*
|
||||
* Remc - 3d.emc rate
|
||||
* R3d - 3d.cbus rate
|
||||
* Rm - 3d.cbus 'middle' rate = (max + min)/2
|
||||
* S - emc_slope
|
||||
* O - emc_offset
|
||||
* Sd - emc_dip_slope
|
||||
* Od - emc_dip_offset
|
||||
*
|
||||
* this superposes a quadratic dip centered around the middle 3d
|
||||
* frequency over a linear correlation of 3d.emc to 3d clock
|
||||
* rates.
|
||||
*
|
||||
* S, O are chosen so that the maximum 3d rate produces the
|
||||
* maximum 3d.emc rate exactly, and the minimum 3d rate produces
|
||||
* at least the minimum 3d.emc rate.
|
||||
*
|
||||
* Sd and Od are chosen to produce the largest dip that will
|
||||
* keep 3d.emc frequencies monotonously decreasing with 3d
|
||||
* frequencies. To achieve this, the first derivative of Remc
|
||||
* with respect to R3d should be zero for the minimal 3d rate:
|
||||
*
|
||||
* R'emc = S - 2 * Sd * (R3d - Rm)
|
||||
* R'emc(R3d-min) = 0
|
||||
* S = 2 * Sd * (R3d-min - Rm)
|
||||
* = 2 * Sd * (R3d-min - R3d-max) / 2
|
||||
*
|
||||
* +------------------------------+
|
||||
* | Sd = S / (R3d-min - R3d-max) |
|
||||
* +------------------------------+
|
||||
*
|
||||
* dip = Sd * (R3d - Rm)^2 + Od
|
||||
*
|
||||
* requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
|
||||
*
|
||||
* Sd * (R3d-min - Rm)^2 + Od = 0
|
||||
* Od = -Sd * ((R3d-min - R3d-max) / 2)^2
|
||||
* = -Sd * ((R3d-min - R3d-max)^2) / 4
|
||||
*
|
||||
* +------------------------------+
|
||||
* | Od = (emc-max - emc-min) / 4 |
|
||||
* +------------------------------+
|
||||
*
|
||||
*/
|
||||
|
||||
void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params,
|
||||
struct clk *clk_3d, struct clk *clk_3d_emc)
|
||||
void gk20a_tegra_calibrate_emc(struct platform_device *pdev,
|
||||
struct gk20a_emc_params *emc_params)
|
||||
{
|
||||
long correction;
|
||||
unsigned long max_emc;
|
||||
unsigned long min_emc;
|
||||
unsigned long min_rate_3d;
|
||||
unsigned long max_rate_3d;
|
||||
struct gk20a *g = get_gk20a(pdev);
|
||||
long gpu_bw, emc_bw;
|
||||
|
||||
max_emc = clk_round_rate(clk_3d_emc, UINT_MAX);
|
||||
max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc));
|
||||
/* Detect and store gpu bw */
|
||||
u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
|
||||
switch (ver) {
|
||||
case GK20A_GPUID_GK20A:
|
||||
gpu_bw = TEGRA_GK20A_BW_PER_FREQ;
|
||||
break;
|
||||
case GK20A_GPUID_GM20B:
|
||||
gpu_bw = TEGRA_GM20B_BW_PER_FREQ;
|
||||
break;
|
||||
default:
|
||||
gpu_bw = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
min_emc = clk_round_rate(clk_3d_emc, 0);
|
||||
min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc));
|
||||
/* TODO detect DDR3 vs DDR4 */
|
||||
emc_bw = TEGRA_DDR3_BW_PER_FREQ;
|
||||
|
||||
max_rate_3d = clk_round_rate(clk_3d, UINT_MAX);
|
||||
max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d));
|
||||
|
||||
min_rate_3d = clk_round_rate(clk_3d, 0);
|
||||
min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d));
|
||||
|
||||
emc_params->emc_slope =
|
||||
FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d));
|
||||
emc_params->emc_offset = max_emc -
|
||||
FXMUL(emc_params->emc_slope, max_rate_3d);
|
||||
/* Guarantee max 3d rate maps to max emc rate */
|
||||
emc_params->emc_offset += max_emc -
|
||||
(FXMUL(emc_params->emc_slope, max_rate_3d) +
|
||||
emc_params->emc_offset);
|
||||
|
||||
emc_params->emc_dip_offset = (max_emc - min_emc) / 4;
|
||||
emc_params->emc_dip_slope =
|
||||
-FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d);
|
||||
emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2;
|
||||
correction =
|
||||
emc_params->emc_dip_offset +
|
||||
FXMUL(emc_params->emc_dip_slope,
|
||||
FXMUL(max_rate_3d - emc_params->emc_xmid,
|
||||
max_rate_3d - emc_params->emc_xmid));
|
||||
emc_params->emc_dip_offset -= correction;
|
||||
/* Calculate the bandwidth ratio of gpu_freq <-> emc_freq
|
||||
* NOTE the ratio must come out as an integer */
|
||||
emc_params->bw_ratio = (gpu_bw / emc_bw);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -427,7 +363,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(pdev);
|
||||
struct gk20a_scale_profile *profile = platform->g->scale_profile;
|
||||
struct gk20a_emc_params *emc_params;
|
||||
struct gk20a_emc_params *emc_params;
|
||||
|
||||
if (!profile)
|
||||
return;
|
||||
@@ -436,8 +372,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev)
|
||||
if (!emc_params)
|
||||
return;
|
||||
|
||||
gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g),
|
||||
platform->clk[2]);
|
||||
gk20a_tegra_calibrate_emc(pdev, emc_params);
|
||||
|
||||
profile->private_data = emc_params;
|
||||
}
|
||||
|
||||
@@ -3688,6 +3688,7 @@ int gk20a_pmu_load_update(struct gk20a *g)
|
||||
|
||||
pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
|
||||
pmu->load_shadow = _load / 10;
|
||||
pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1080,6 +1080,7 @@ struct pmu_gk20a {
|
||||
|
||||
u32 sample_buffer;
|
||||
u32 load_shadow;
|
||||
u32 load_avg;
|
||||
|
||||
struct mutex isr_mutex;
|
||||
bool isr_enabled;
|
||||
|
||||
Reference in New Issue
Block a user