gpu: nvgpu: 3d.emc bandwidth ratio policy

Modify the 3d.emc policy to use a formula based on bandwidth and
utilization instead of the current sku-dependent policy.

Bug 1364894

Change-Id: Id97f765a48f0aa9f5ebeb0c82bccb22db474a1ae
Signed-off-by: Samuel Russell <samuelr@nvidia.com>
Reviewed-on: http://git-master/r/453586
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Samuel Russell
2014-07-22 10:55:54 -07:00
committed by Dan Willemsen
parent 04efcaf97e
commit 08dc7c3584
6 changed files with 72 additions and 101 deletions

View File

@@ -71,6 +71,8 @@
#define GK20A_NUM_CDEVS 6
#define EMC3D_DEFAULT_RATIO 750
#if defined(GK20A_DEBUG)
u32 gk20a_dbg_mask = GK20A_DEFAULT_DBG_MASK;
u32 gk20a_dbg_ftrace;
@@ -1462,6 +1464,8 @@ static int gk20a_probe(struct platform_device *dev)
return err;
}
gk20a->emc3d_ratio = EMC3D_DEFAULT_RATIO;
/* Initialise scaling */
if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
gk20a_scale_init(dev);

View File

@@ -297,6 +297,8 @@ struct gk20a {
bool forced_reset;
bool allow_all;
u32 emc3d_ratio;
#ifdef CONFIG_DEBUG_FS
spinlock_t debugfs_lock;
struct dentry *debugfs_ltc_enabled;

View File

@@ -469,6 +469,32 @@ static ssize_t allow_all_enable_store(struct device *device,
static DEVICE_ATTR(allow_all, ROOTRW,
allow_all_enable_read, allow_all_enable_store);
static ssize_t emc3d_ratio_store(struct device *device,
struct device_attribute *attr, const char *buf, size_t count)
{
struct platform_device *ndev = to_platform_device(device);
struct gk20a *g = get_gk20a(ndev);
unsigned long val = 0;
if (kstrtoul(buf, 10, &val) < 0)
return -EINVAL;
g->emc3d_ratio = val;
return count;
}
static ssize_t emc3d_ratio_read(struct device *device,
struct device_attribute *attr, char *buf)
{
struct platform_device *ndev = to_platform_device(device);
struct gk20a *g = get_gk20a(ndev);
return sprintf(buf, "%d\n", g->emc3d_ratio);
}
static DEVICE_ATTR(emc3d_ratio, ROOTRW, emc3d_ratio_read, emc3d_ratio_store);
#ifdef CONFIG_PM_RUNTIME
static ssize_t force_idle_store(struct device *device,
struct device_attribute *attr, const char *buf, size_t count)
@@ -566,6 +592,7 @@ void gk20a_remove_sysfs(struct device *dev)
device_remove_file(dev, &dev_attr_slcg_enable);
device_remove_file(dev, &dev_attr_ptimer_scale_factor);
device_remove_file(dev, &dev_attr_elpg_enable);
device_remove_file(dev, &dev_attr_emc3d_ratio);
device_remove_file(dev, &dev_attr_counters);
device_remove_file(dev, &dev_attr_counters_reset);
device_remove_file(dev, &dev_attr_load);
@@ -593,6 +620,7 @@ void gk20a_create_sysfs(struct platform_device *dev)
error |= device_create_file(&dev->dev, &dev_attr_slcg_enable);
error |= device_create_file(&dev->dev, &dev_attr_ptimer_scale_factor);
error |= device_create_file(&dev->dev, &dev_attr_elpg_enable);
error |= device_create_file(&dev->dev, &dev_attr_emc3d_ratio);
error |= device_create_file(&dev->dev, &dev_attr_counters);
error |= device_create_file(&dev->dev, &dev_attr_counters_reset);
error |= device_create_file(&dev->dev, &dev_attr_load);

View File

@@ -39,16 +39,15 @@
#define TEGRA_GK20A_SIM_BASE 0x538F0000 /*tbd: get from iomap.h */
#define TEGRA_GK20A_SIM_SIZE 0x1000 /*tbd: this is a high-side guess */
#define TEGRA_GK20A_BW_PER_FREQ 32
#define TEGRA_GM20B_BW_PER_FREQ 64
#define TEGRA_DDR3_BW_PER_FREQ 16
extern struct device tegra_vpr_dev;
struct gk20a_platform t132_gk20a_tegra_platform;
struct gk20a_emc_params {
long emc_slope;
long emc_offset;
long emc_dip_slope;
long emc_dip_offset;
long emc_xmid;
bool linear;
long bw_ratio;
};
/*
@@ -189,20 +188,17 @@ fail:
* This function returns the minimum emc clock based on gpu frequency
*/
long gk20a_tegra_get_emc_rate(struct gk20a_emc_params *emc_params, long freq)
long gk20a_tegra_get_emc_rate(struct gk20a *g,
struct gk20a_emc_params *emc_params, long freq)
{
long hz;
freq = INT_TO_FX(HZ_TO_MHZ(freq));
hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset;
freq = HZ_TO_MHZ(freq);
hz -= FXMUL(emc_params->emc_dip_slope,
FXMUL(freq - emc_params->emc_xmid,
freq - emc_params->emc_xmid)) +
emc_params->emc_dip_offset;
hz = (freq * emc_params->bw_ratio);
hz = (hz * min(g->pmu.load_avg, g->emc3d_ratio)) / 1000;
hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */
hz = (hz < 0) ? 0 : hz;
hz = MHZ_TO_HZ(hz);
return hz;
}
@@ -222,7 +218,7 @@ static void gk20a_tegra_postscale(struct platform_device *pdev,
struct gk20a *g = get_gk20a(pdev);
long after = gk20a_clk_get_rate(g);
long emc_target = gk20a_tegra_get_emc_rate(emc_params, after);
long emc_target = gk20a_tegra_get_emc_rate(g, emc_params, after);
clk_set_rate(platform->clk[2], emc_target);
}
@@ -245,94 +241,34 @@ static void gk20a_tegra_prescale(struct platform_device *pdev)
/*
* gk20a_tegra_calibrate_emc()
*
* Compute emc scaling parameters
*
* Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
*
* Remc - 3d.emc rate
* R3d - 3d.cbus rate
* Rm - 3d.cbus 'middle' rate = (max + min)/2
* S - emc_slope
* O - emc_offset
* Sd - emc_dip_slope
* Od - emc_dip_offset
*
* this superposes a quadratic dip centered around the middle 3d
* frequency over a linear correlation of 3d.emc to 3d clock
* rates.
*
* S, O are chosen so that the maximum 3d rate produces the
* maximum 3d.emc rate exactly, and the minimum 3d rate produces
* at least the minimum 3d.emc rate.
*
* Sd and Od are chosen to produce the largest dip that will
* keep 3d.emc frequencies monotonously decreasing with 3d
* frequencies. To achieve this, the first derivative of Remc
* with respect to R3d should be zero for the minimal 3d rate:
*
* R'emc = S - 2 * Sd * (R3d - Rm)
* R'emc(R3d-min) = 0
* S = 2 * Sd * (R3d-min - Rm)
* = 2 * Sd * (R3d-min - R3d-max) / 2
*
* +------------------------------+
* | Sd = S / (R3d-min - R3d-max) |
* +------------------------------+
*
* dip = Sd * (R3d - Rm)^2 + Od
*
* requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
*
* Sd * (R3d-min - Rm)^2 + Od = 0
* Od = -Sd * ((R3d-min - R3d-max) / 2)^2
* = -Sd * ((R3d-min - R3d-max)^2) / 4
*
* +------------------------------+
* | Od = (emc-max - emc-min) / 4 |
* +------------------------------+
*
*/
void gk20a_tegra_calibrate_emc(struct gk20a_emc_params *emc_params,
struct clk *clk_3d, struct clk *clk_3d_emc)
void gk20a_tegra_calibrate_emc(struct platform_device *pdev,
struct gk20a_emc_params *emc_params)
{
long correction;
unsigned long max_emc;
unsigned long min_emc;
unsigned long min_rate_3d;
unsigned long max_rate_3d;
struct gk20a *g = get_gk20a(pdev);
long gpu_bw, emc_bw;
max_emc = clk_round_rate(clk_3d_emc, UINT_MAX);
max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc));
/* Detect and store gpu bw */
u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
switch (ver) {
case GK20A_GPUID_GK20A:
gpu_bw = TEGRA_GK20A_BW_PER_FREQ;
break;
case GK20A_GPUID_GM20B:
gpu_bw = TEGRA_GM20B_BW_PER_FREQ;
break;
default:
gpu_bw = 0;
break;
}
min_emc = clk_round_rate(clk_3d_emc, 0);
min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc));
/* TODO detect DDR3 vs DDR4 */
emc_bw = TEGRA_DDR3_BW_PER_FREQ;
max_rate_3d = clk_round_rate(clk_3d, UINT_MAX);
max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d));
min_rate_3d = clk_round_rate(clk_3d, 0);
min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d));
emc_params->emc_slope =
FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d));
emc_params->emc_offset = max_emc -
FXMUL(emc_params->emc_slope, max_rate_3d);
/* Guarantee max 3d rate maps to max emc rate */
emc_params->emc_offset += max_emc -
(FXMUL(emc_params->emc_slope, max_rate_3d) +
emc_params->emc_offset);
emc_params->emc_dip_offset = (max_emc - min_emc) / 4;
emc_params->emc_dip_slope =
-FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d);
emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2;
correction =
emc_params->emc_dip_offset +
FXMUL(emc_params->emc_dip_slope,
FXMUL(max_rate_3d - emc_params->emc_xmid,
max_rate_3d - emc_params->emc_xmid));
emc_params->emc_dip_offset -= correction;
/* Calculate the bandwidth ratio of gpu_freq <-> emc_freq
* NOTE the ratio must come out as an integer */
emc_params->bw_ratio = (gpu_bw / emc_bw);
}
/*
@@ -427,7 +363,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev)
{
struct gk20a_platform *platform = gk20a_get_platform(pdev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
struct gk20a_emc_params *emc_params;
struct gk20a_emc_params *emc_params;
if (!profile)
return;
@@ -436,8 +372,7 @@ static void gk20a_tegra_scale_init(struct platform_device *pdev)
if (!emc_params)
return;
gk20a_tegra_calibrate_emc(emc_params, gk20a_clk_get(platform->g),
platform->clk[2]);
gk20a_tegra_calibrate_emc(pdev, emc_params);
profile->private_data = emc_params;
}

View File

@@ -3688,6 +3688,7 @@ int gk20a_pmu_load_update(struct gk20a *g)
pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
pmu->load_shadow = _load / 10;
pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10);
return 0;
}

View File

@@ -1080,6 +1080,7 @@ struct pmu_gk20a {
u32 sample_buffer;
u32 load_shadow;
u32 load_avg;
struct mutex isr_mutex;
bool isr_enabled;