From 601567ef37970d2562c5bccaf8355fa240da9a01 Mon Sep 17 00:00:00 2001 From: Johnny Liu Date: Thu, 3 Oct 2024 12:36:17 +0000 Subject: [PATCH] devfreq: nvhost_podgov: implementation cleanup Previous implementation lacks of documentation and it's hard to understand the effect of changing specific tunable parameter. Revise the implementation so that the scaling behavior is somehow aligned with the tegra_wmark governor supported by the actmon. The only different is that nvhost_podgov governor is polling-based DFS algorithm and it maintaines the moving average load in the governor instead of hardware itself. Bug 4892068 Change-Id: I033cb1359a484d4c9433fa4f2e7a99c42cb636b3 Signed-off-by: Johnny Liu Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3315446 GVS: buildbot_gerritrpt Reviewed-by: Rajesh Devaraj --- drivers/devfreq/governor_pod_scaling.c | 1003 ++++++------------------ include/linux/devfreq/nvhost_podgov.h | 10 + include/trace/events/nvhost_podgov.h | 250 ------ 3 files changed, 229 insertions(+), 1034 deletions(-) create mode 100644 include/linux/devfreq/nvhost_podgov.h delete mode 100644 include/trace/events/nvhost_podgov.h diff --git a/drivers/devfreq/governor_pod_scaling.c b/drivers/devfreq/governor_pod_scaling.c index 077c8b95..51ed9525 100644 --- a/drivers/devfreq/governor_pod_scaling.c +++ b/drivers/devfreq/governor_pod_scaling.c @@ -1,877 +1,315 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * SPDX-FileCopyrightText: Copyright (c) 2012-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - */ - -/* - * Power-on-demand clock scaling for nvhost devices - * - * devfreq calls nvhost_pod_estimate_freq() for estimating the new - * frequency for the device. The clocking is done using the load of the device - * is estimated using the busy times from the device profile. This information - * indicates if the device frequency should be altered. - * + * SPDX-FileCopyrightText: Copyright (c) 2012-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ #include #include -#include -#include #include +#include #include #include -#include #include #include #include #include -#define CREATE_TRACE_POINTS -#include #include -#define GET_TARGET_FREQ_DONTSCALE 1 -#ifdef CONFIG_DEVFREQ_GOV_POD_SCALING_HISTORY_BUFFER_SIZE_MAX -#define MAX_HISTORY_BUF_SIZE \ - CONFIG_DEVFREQ_GOV_POD_SCALING_HISTORY_BUFFER_SIZE_MAX -#else -#define MAX_HISTORY_BUF_SIZE 0 -#endif +#define DEFINE_ATTR_LOAD_STORE(name, limit) \ + static ssize_t name##_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, \ + size_t count) \ + { \ + struct devfreq *df = to_devfreq(dev); \ + struct podgov_data *podgov; \ + unsigned int name; \ + int ret; \ + \ + ret = kstrtouint(buf, 0, &name); \ + if (ret) \ + return ret; \ + \ + name = min_t(unsigned int, name, limit); \ + \ + mutex_lock(&df->lock); \ + podgov = df->governor_data; \ + podgov->name = name; \ + mutex_unlock(&df->lock); \ + \ + return count; \ + } \ + static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + struct devfreq *df = to_devfreq(dev); \ + struct podgov_data *podgov; \ + int err; \ + \ + mutex_lock(&df->lock); \ + podgov = df->governor_data; \ + err = sprintf(buf, "%u\n", podgov->name); \ + mutex_unlock(&df->lock); \ + \ + return err; \ + } \ + static DEVICE_ATTR_RW(name) -static void podgov_enable(struct devfreq *df, int enable); -static void podgov_set_user_ctl(struct devfreq *df, int enable); -static struct devfreq_governor nvhost_podgov; +/** + * struct podgov_data - governor private data stored in struct devfreq + * @load_target: Frequency scaling logic will try to keep the device + * running at the specified load with specific frequency. + * The valid value of 'load_target' ranges from 0 to 1000. + * @load_margin: Margin value associated with the 'load_target' to determine + * the load threshold to scale down the device frequency. If + * load_target equals to 700 and load_margin equals to 100, + * then the governor will scale down the frequency when load + * of device is below 600 (700 - 100). The valid value of + * 'load_margin' ranges from 0 to 1000. + * @load_max: Whenever the instantaneous load value exceeds this value + * , the governor will try to scale the device to whatever + * maximum frequency it can achieve. The valid value of + * 'load_max' ranges from 0 to 1000. + * @down_freq_margin: Number of frequency steps for scaling down the frequency + * when the moving-average load value below the load_target. + * @up_freq_margin: Number of frequency steps for scaling up the frequency + * when the moving-average load value beyond the load_target. + * @k: The moving-average weight factor for the average load. + * The average load of the device is calcualted as following: + * $$ + * avg_load = (avg_load * (2**k - 1) + load) / 2**k + * $$ + * @avg_load: Moving-average load value tracked by the governor. + * @freq_index: Index value of current frequency in the frequency table. + * @df: The devfreq device associated with the governor. + * @nb: Notifier block for DEVFREQ_TRANSITION_NOTIFIER list. + */ +struct podgov_data { + /* Tunable parameters */ + unsigned int load_target; + unsigned int load_margin; + unsigned int load_max; + unsigned int down_freq_margin; + unsigned int up_freq_margin; + unsigned int k; -/******************************************************************************* - * podgov_info_rec - gr3d scaling governor specific parameters - ******************************************************************************/ - -struct podgov_info_rec { - int enable; - int suspended; - int init; - - ktime_t last_scale; - - unsigned int p_block_window; - unsigned int p_smooth; - int p_damp; - int p_load_max; - int p_load_target; - int p_bias; - unsigned int p_user; - unsigned int p_freq_request; - - unsigned long cycles_norm; - unsigned long cycles_avg; - - unsigned long *cycles_history_buf; - int p_history_buf_size; - int history_next; - int history_count; - unsigned long recent_high; - - unsigned long rt_load; - - int adjustment_type; - unsigned long adjustment_frequency; - - struct devfreq *power_manager; - struct dentry *debugdir; - - unsigned long *freqlist; - int freq_count; - - int freq_avg; - - struct kobj_attribute enable_3d_scaling_attr; - struct kobj_attribute user_attr; - struct kobj_attribute freq_request_attr; - - struct mutex lock; + /* Private fields */ + unsigned int avg_load; + int freq_index; + struct devfreq *df; + struct notifier_block nb; }; -/******************************************************************************* - * Adjustment type is used to tell the source that requested frequency re- - * estimation. Type ADJUSTMENT_LOCAL indicates that the re-estimation was - * initiated by the governor itself. This happens when one of the worker - * threads want to adjust the frequency. - * - * ADJUSTMENT_DEVICE_REQ (default value) indicates that the adjustment was - * initiated by a device event. - ******************************************************************************/ +DEFINE_ATTR_LOAD_STORE(load_target, 1000); +DEFINE_ATTR_LOAD_STORE(load_margin, 1000); +DEFINE_ATTR_LOAD_STORE(load_max, 1000); +DEFINE_ATTR_LOAD_STORE(down_freq_margin, 10); +DEFINE_ATTR_LOAD_STORE(up_freq_margin, 10); +DEFINE_ATTR_LOAD_STORE(k, 10); -enum podgov_adjustment_type { - ADJUSTMENT_LOCAL = 0, - ADJUSTMENT_DEVICE_REQ = 1 +static struct attribute *dev_entries[] = { + &dev_attr_load_target.attr, + &dev_attr_load_margin.attr, + &dev_attr_load_max.attr, + &dev_attr_down_freq_margin.attr, + &dev_attr_up_freq_margin.attr, + &dev_attr_k.attr, + NULL, }; -#define HZ_PER_KHZ 1000 +static struct attribute_group dev_attr_group = { + .name = DEVFREQ_GOV_NVHOST_PODGOV, + .attrs = dev_entries, +}; -static void get_freq_range(struct devfreq *devfreq, - unsigned long *min_freq, - unsigned long *max_freq) +static int devfreq_get_freq_index(struct devfreq *df, unsigned long freq) { #if defined(NV_DEVFREQ_HAS_FREQ_TABLE) - unsigned long *freq_table = devfreq->freq_table; - unsigned int max_state = devfreq->max_state; + unsigned long *freq_table = df->freq_table; + unsigned int max_state = df->max_state; #else - unsigned long *freq_table = devfreq->profile->freq_table; - unsigned int max_state = devfreq->profile->max_state; + unsigned long *freq_table = df->profile->freq_table; + unsigned int max_state = df->profile->max_state; #endif + int i; - lockdep_assert_held(&devfreq->lock); - - if (freq_table[0] < freq_table[max_state - 1]) { - *min_freq = freq_table[0]; - *max_freq = freq_table[max_state - 1]; - } else { - *min_freq = freq_table[max_state - 1]; - *max_freq = freq_table[0]; - } - - /* Apply constraints from OPP interface */ - *min_freq = max(*min_freq, devfreq->scaling_min_freq); - *max_freq = min(*max_freq, devfreq->scaling_max_freq); - - if (*min_freq > *max_freq) - *min_freq = *max_freq; -} - -static void get_min_freq_limit(struct devfreq *df, unsigned long *min_freq_hz) -{ - unsigned long max_freq_hz; - - get_freq_range(df, min_freq_hz, &max_freq_hz); -} - -static void get_max_freq_limit(struct devfreq *df, unsigned long *max_freq_hz) -{ - unsigned long min_freq_hz; - - get_freq_range(df, &min_freq_hz, max_freq_hz); -} - -/******************************************************************************* - * scaling_limit(df, freq) - * - * Limit the given frequency - ******************************************************************************/ - -static void scaling_limit(struct devfreq *df, unsigned long *freq) -{ - unsigned long min_freq_hz = 0; - unsigned long max_freq_hz = 0; - - get_min_freq_limit(df, &min_freq_hz); - get_max_freq_limit(df, &max_freq_hz); - - if (*freq < min_freq_hz) - *freq = min_freq_hz; - else if (*freq > max_freq_hz) - *freq = max_freq_hz; -} - -/******************************************************************************* - * podgov_enable(dev, enable) - * - * This function enables (enable=1) or disables (enable=0) the automatic scaling - * of the device. If the device is disabled, the device's clock is set to its - * maximum. - ******************************************************************************/ - -static void podgov_enable(struct devfreq *df, int enable) -{ - struct device *dev = df->dev.parent; - struct podgov_info_rec *podgov = df->data; - bool polling; - - /* make sure the device is alive before doing any scaling */ - pm_runtime_get_noresume(dev); - - mutex_lock(&podgov->lock); - mutex_lock(&df->lock); - - trace_podgov_enabled(df->dev.parent, enable); - - /* store the enable information */ - podgov->enable = enable; - - /* skip local adjustment if we are enabling or the device is - * suspended */ - if (!enable && pm_runtime_active(dev)) { - /* full speed */ - get_max_freq_limit(df, &podgov->adjustment_frequency); - podgov->adjustment_type = ADJUSTMENT_LOCAL; - update_devfreq(df); - } - - polling = podgov->enable && !podgov->p_user; - - /* Need to unlock to call devfreq_monitor_suspend/resume() - * still holding podgov->lock to guarantee atomicity - */ - mutex_unlock(&df->lock); - - if (polling) - devfreq_monitor_resume(df); - else - devfreq_monitor_suspend(df); - - mutex_unlock(&podgov->lock); - pm_runtime_put(dev); -} - -/***************************************************************************** - * podgov_set_user_ctl(dev, user) - * - * This function enables or disables user control of the gpu. If user control - * is enabled, setting the freq_request controls the gpu frequency, and other - * gpu scaling mechanisms are disabled. - ******************************************************************************/ - -static void podgov_set_user_ctl(struct devfreq *df, int user) -{ - struct device *dev = df->dev.parent; - struct podgov_info_rec *podgov = df->data; - int old_user; - bool polling; - - /* make sure the device is alive before doing any scaling */ - pm_runtime_get_noresume(dev); - - mutex_lock(&podgov->lock); - mutex_lock(&df->lock); - - trace_podgov_set_user_ctl(df->dev.parent, user); - - /* store the new user value */ - old_user = podgov->p_user; - podgov->p_user = user; - - /* skip scaling, if scaling (or the whole device) is turned off - * - or the scaling already was in user mode */ - if (pm_runtime_active(dev) && podgov->enable && user && !old_user) { - /* write request */ - podgov->adjustment_frequency = podgov->p_freq_request; - podgov->adjustment_type = ADJUSTMENT_LOCAL; - update_devfreq(df); - } - - polling = podgov->enable && !podgov->p_user; - - /* Need to unlock to call devfreq_monitor_suspend/resume() - * still holding podgov->lock to guarantee atomicity - */ - mutex_unlock(&df->lock); - - if (polling) - devfreq_monitor_resume(df); - else - devfreq_monitor_suspend(df); - - mutex_unlock(&podgov->lock); - pm_runtime_put(dev); -} - -/***************************************************************************** - * podgov_set_freq_request(dev, user) - * - * Set the current freq request. If scaling is enabled, and podgov user space - * control is enabled, this will set the gpu frequency. - ******************************************************************************/ - -static void podgov_set_freq_request(struct devfreq *df, int freq_request) -{ - struct device *dev = df->dev.parent; - struct podgov_info_rec *podgov; - - /* make sure the device is alive before doing any scaling */ - pm_runtime_get_noresume(dev); - - mutex_lock(&df->lock); - - podgov = df->data; - - trace_podgov_set_freq_request(df->dev.parent, freq_request); - - podgov->p_freq_request = freq_request; - - /* update the request only if podgov is enabled, device is turned on - * and the scaling is in user mode */ - if (podgov->enable && podgov->p_user && - pm_runtime_active(dev)) { - podgov->adjustment_frequency = freq_request; - podgov->adjustment_type = ADJUSTMENT_LOCAL; - update_devfreq(df); - } - - mutex_unlock(&df->lock); - pm_runtime_put(dev); -} - - -/******************************************************************************* - * freq = scaling_state_check(df, time) - * - * This handler is called to adjust the frequency of the device. The function - * returns the desired frequency for the clock. If there is no need to tune the - * clock immediately, 0 is returned. - ******************************************************************************/ - -static unsigned long scaling_state_check(struct devfreq *df, ktime_t time) -{ - struct podgov_info_rec *pg = df->data; - struct devfreq_dev_status *ds = &df->last_status; - unsigned long dt, busyness, rt_load = pg->rt_load; - long max_boost, damp, freq, boost, res; - unsigned long max_freq_hz = 0; - - dt = (unsigned long) ktime_us_delta(time, pg->last_scale); - if (dt < pg->p_block_window || df->previous_freq == 0) - return 0; - - /* convert to mhz to avoid overflow */ - freq = df->previous_freq / 1000000; - get_max_freq_limit(df, &max_freq_hz); - max_boost = ((max_freq_hz / 3) / 1000000); - - /* calculate and trace load */ - busyness = 1000ULL * pg->cycles_avg / ds->current_frequency; - - /* consider recent high load if required */ - if (pg->p_history_buf_size && pg->history_count) - busyness = 1000ULL * pg->recent_high / ds->current_frequency; - - trace_podgov_load(df->dev.parent, rt_load); - trace_podgov_busy(df->dev.parent, busyness); - - damp = pg->p_damp; - - if (rt_load > pg->p_load_max) { - /* if too busy, scale up max/3, do not damp */ - boost = max_boost; - damp = 10; - } else { - /* boost = bias * freq * (busyness - target)/target */ - boost = busyness - pg->p_load_target; - boost *= (pg->p_bias * freq); - boost /= (100 * pg->p_load_target); - - /* clamp to max boost */ - boost = (boost < max_boost) ? boost : max_boost; - } - - /* calculate new request */ - res = freq + boost; - - /* Maintain average request */ - pg->freq_avg = (pg->freq_avg * pg->p_smooth) + res; - pg->freq_avg /= (pg->p_smooth+1); - - /* Applying damping to frequencies */ - res = ((damp * res) + ((10 - damp)*pg->freq_avg)) / 10; - - /* Convert to hz, limit, and apply */ - res = res * 1000000; - scaling_limit(df, &res); - trace_podgov_scaling_state_check(df->dev.parent, - df->previous_freq, res); - return res; -} - -/******************************************************************************* - * freqlist_up(podgov, target, steps) - * - * This function determines the frequency that is "steps" frequency steps - * higher compared to the target frequency. - ******************************************************************************/ - -static int freqlist_up(struct podgov_info_rec *podgov, unsigned long target, - int steps) -{ - int i, pos; - - for (i = 0; i < podgov->freq_count; i++) - if (podgov->freqlist[i] >= target) + for (i = 0; i < max_state; i++) { + if (freq_table[i] >= freq) break; - - pos = min(podgov->freq_count - 1, i + steps); - return podgov->freqlist[pos]; -} - -/******************************************************************************* - * debugfs interface for controlling 3d clock scaling on the fly - ******************************************************************************/ - -#ifdef CONFIG_DEBUG_FS - -static void nvhost_scale_emc_debug_init(struct devfreq *df) -{ - struct podgov_info_rec *podgov = df->data; - char dirname[128]; - int err; - - err = snprintf(dirname, sizeof(dirname), "%s_scaling", - to_platform_device(df->dev.parent)->name); - WARN_ON(err < 0); - - if (!podgov) - return; - - podgov->debugdir = debugfs_create_dir(dirname, NULL); - if (!podgov->debugdir) { - pr_err("podgov: can\'t create debugfs directory\n"); - return; } -#define CREATE_PODGOV_FILE(fname) \ - do {\ - debugfs_create_u32(#fname, S_IRUGO | S_IWUSR, \ - podgov->debugdir, &podgov->p_##fname); \ - } while (0) - - CREATE_PODGOV_FILE(block_window); - CREATE_PODGOV_FILE(load_max); - CREATE_PODGOV_FILE(load_target); - CREATE_PODGOV_FILE(bias); - CREATE_PODGOV_FILE(damp); - CREATE_PODGOV_FILE(smooth); -#undef CREATE_PODGOV_FILE + return i; } -static void nvhost_scale_emc_debug_deinit(struct devfreq *df) +static int devfreq_notifier_call(struct notifier_block *nb, unsigned long event, void *ptr) { - struct podgov_info_rec *podgov = df->data; + struct podgov_data *podgov = container_of(nb, struct podgov_data, nb); + struct devfreq_freqs *freqs = ptr; - debugfs_remove_recursive(podgov->debugdir); + switch (event) { + case DEVFREQ_POSTCHANGE: + podgov->freq_index = devfreq_get_freq_index(podgov->df, freqs->new); + break; + default: + break; + }; + + return NOTIFY_DONE; } -#else -static void nvhost_scale_emc_debug_init(struct devfreq *df) +static int nvhost_pod_target_freq(struct devfreq *df, unsigned long *freq) { - (void)df; -} - -static void nvhost_scale_emc_debug_deinit(struct devfreq *df) -{ - (void)df; -} -#endif - -/******************************************************************************* - * sysfs interface for enabling/disabling 3d scaling - ******************************************************************************/ - -static ssize_t enable_3d_scaling_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - struct podgov_info_rec *podgov = container_of(attr, - struct podgov_info_rec, - enable_3d_scaling_attr); - ssize_t res; - - res = snprintf(buf, PAGE_SIZE, "%d\n", podgov->enable); - WARN_ON(res < 0); - - return res; -} - -static ssize_t enable_3d_scaling_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct podgov_info_rec *podgov = container_of(attr, - struct podgov_info_rec, - enable_3d_scaling_attr); - unsigned long val = 0; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - podgov_enable(podgov->power_manager, val); - - return count; -} - -/******************************************************************************* - * sysfs interface for user space control - * user = [0,1] disables / enabled user space control - * freq_request is the sysfs node user space writes frequency requests to - ******************************************************************************/ - -static ssize_t user_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - struct podgov_info_rec *podgov = - container_of(attr, struct podgov_info_rec, user_attr); - ssize_t res; - - res = snprintf(buf, PAGE_SIZE, "%d\n", podgov->p_user); - WARN_ON(res < 0); - - return res; -} - -static ssize_t user_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct podgov_info_rec *podgov = - container_of(attr, struct podgov_info_rec, user_attr); - unsigned long val = 0; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - podgov_set_user_ctl(podgov->power_manager, val); - - return count; -} - -static ssize_t freq_request_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - struct podgov_info_rec *podgov = - container_of(attr, struct podgov_info_rec, freq_request_attr); - ssize_t res; - - res = snprintf(buf, PAGE_SIZE, "%d\n", podgov->p_freq_request); - WARN_ON(res < 0); - - return res; -} - -static ssize_t freq_request_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct podgov_info_rec *podgov = - container_of(attr, struct podgov_info_rec, freq_request_attr); - unsigned long val = 0; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - podgov_set_freq_request(podgov->power_manager, val); - - return count; -} - -/******************************************************************************* - * nvhost_pod_estimate_freq(df, freq) - * - * This function is called for re-estimating the frequency. The function is - * called in three conditions: - * - * (1) Internal request to change the frequency. In this case a new clock - * target is immediately set for the device. - * (2) Call from the client (something has happened and re-estimation - * is required). - * (3) Some other reason (i.e. periodic call) - * - ******************************************************************************/ - -static int nvhost_pod_estimate_freq(struct devfreq *df, - unsigned long *freq) -{ - struct podgov_info_rec *pg = df->data; - struct devfreq_dev_status *ds; - int err, i; - int buf_size = pg->p_history_buf_size; - int buf_next = pg->history_next; - int buf_count = pg->history_count; - unsigned long *cycles_buffer = pg->cycles_history_buf; - ktime_t now; - unsigned long long norm_load; - - /* If the device is suspended, clear the history and set frequency to - * min freq. - */ - if (pg->suspended) { - *freq = DEVFREQ_MIN_FREQ; - pg->last_scale = ktime_get(); - i = 0; - for (; i < MAX_HISTORY_BUF_SIZE; i++) - pg->cycles_history_buf[i] = 0; - pg->history_count = 0; - pg->history_next = 0; - pg->recent_high = 0; - pg->freq_avg = 0; - return 0; - } - - /* Ensure maximal clock when scaling is disabled */ - if (!pg->enable) { - *freq = DEVFREQ_MAX_FREQ; - if (*freq == df->previous_freq) - return GET_TARGET_FREQ_DONTSCALE; - else - return 0; - } - - if (pg->p_user) { - *freq = pg->p_freq_request; - return 0; - } + struct podgov_data *podgov = df->governor_data; + struct devfreq_dev_status *status; + unsigned long load, weight; + unsigned int down_threshold; + int index, err; err = devfreq_update_stats(df); if (err) return err; - ds = &df->last_status; + status = &df->last_status; - if (ds->total_time == 0) { - *freq = ds->current_frequency; + /* Update two types of loads */ + load = status->busy_time * 1000 / status->total_time; + weight = 1 << podgov->k; + podgov->avg_load = (podgov->avg_load * (weight - 1) + load) / weight; + + /* Scale up to maximum frequency to respond transient peak workload */ + if (load >= podgov->load_max) { + *freq = ULONG_MAX; return 0; } - now = ktime_get(); + /* Scale up current frequency by number of steps */ + if (podgov->avg_load > podgov->load_target) { + index = podgov->freq_index + podgov->up_freq_margin; - /* Local adjustments (i.e. requests from kernel threads) are - * handled here */ +#if defined(NV_DEVFREQ_HAS_FREQ_TABLE) + if (index < 0) + index = df->max_state - 1; + else + index = min(index, (int)df->max_state - 1); - if (pg->adjustment_type == ADJUSTMENT_LOCAL) { + *freq = df->freq_table[index]; +#else + if (index < 0) + index = df->profile->max_state - 1; + else + index = min(index, (int)df->profile->max_state - 1); - pg->adjustment_type = ADJUSTMENT_DEVICE_REQ; - - /* Do not do unnecessary scaling */ - scaling_limit(df, &pg->adjustment_frequency); - - trace_podgov_estimate_freq(df->dev.parent, - df->previous_freq, - pg->adjustment_frequency); - - *freq = pg->adjustment_frequency; + *freq = df->profile->freq_table[index]; +#endif return 0; } - /* Sustain local variables */ - norm_load = (u64)ds->current_frequency * ds->busy_time / ds->total_time; - pg->cycles_norm = norm_load; - pg->cycles_avg = ((u64)pg->cycles_avg * pg->p_smooth + norm_load) / - (pg->p_smooth + 1); - pg->rt_load = 1000ULL * ds->busy_time / ds->total_time; - - /* Update history of normalized cycle counts and recent highest count */ - if (buf_size) { - if (buf_count == buf_size) { - pg->recent_high = 0; - i = (buf_next + 1) % buf_size; - for (; i != buf_next; i = (i + 1) % buf_size) { - if (cycles_buffer[i] > pg->recent_high) - pg->recent_high = cycles_buffer[i]; - } - } - cycles_buffer[buf_next] = norm_load; - pg->history_next = (buf_next + 1) % buf_size; - if (buf_count < buf_size) - pg->history_count += 1; - if (norm_load > pg->recent_high) - pg->recent_high = norm_load; + /* Scale down current frequency by number of steps */ + if (podgov->load_margin < podgov->load_target) { + down_threshold = podgov->load_target - podgov->load_margin; + } else { + down_threshold = 0; } - *freq = scaling_state_check(df, now); - - if (!(*freq)) { - *freq = ds->current_frequency; + if (podgov->avg_load < down_threshold) { + index = podgov->freq_index - podgov->down_freq_margin; + index = max(index, 0); +#if defined(NV_DEVFREQ_HAS_FREQ_TABLE) + *freq = df->freq_table[index]; +#else + *freq = df->profile->freq_table[index]; +#endif return 0; } - if ((*freq = freqlist_up(pg, *freq, 0)) == ds->current_frequency) - return 0; - - pg->last_scale = now; - - trace_podgov_estimate_freq(df->dev.parent, df->previous_freq, *freq); - - + /* Stay with the same frequency */ + *freq = status->current_frequency; return 0; } -/******************************************************************************* - * nvhost_pod_init(struct devfreq *df) - * - * Governor initialisation. - ******************************************************************************/ - static int nvhost_pod_init(struct devfreq *df) { - struct podgov_info_rec *podgov; - struct platform_device *d = to_platform_device(df->dev.parent); - ktime_t now = ktime_get(); + struct podgov_data *podgov; + struct devfreq_dev_status *status; + int err; - struct kobj_attribute *attr = NULL; + err = devfreq_update_stats(df); + if (err) + return err; - podgov = kzalloc(sizeof(struct podgov_info_rec), GFP_KERNEL); + status = &df->last_status; + + podgov = kzalloc(sizeof(*podgov), GFP_KERNEL); if (!podgov) - goto err_alloc_podgov; + return -ENOMEM; - podgov->cycles_history_buf = - kzalloc(sizeof(unsigned long) * MAX_HISTORY_BUF_SIZE, - GFP_KERNEL); - if (!podgov->cycles_history_buf) - goto err_alloc_history_buffer; + df->governor_data = (void *)podgov; - podgov->p_history_buf_size = - MAX_HISTORY_BUF_SIZE < 100 ? MAX_HISTORY_BUF_SIZE : 100; - podgov->history_count = 0; - podgov->history_next = 0; - podgov->recent_high = 0; + /* Set default scaling parameters */ + podgov->load_target = 700; + podgov->load_margin = 100; + podgov->load_max = 900; + podgov->down_freq_margin = 1; + podgov->up_freq_margin = 4; + podgov->k = 3; - df->data = (void *)podgov; + /* Reset private data */ + podgov->avg_load = 0; + podgov->freq_index = devfreq_get_freq_index(df, status->current_frequency); + podgov->df = df; - /* Set scaling parameter defaults */ - podgov->enable = 1; - podgov->suspended = 0; + podgov->nb.notifier_call = devfreq_notifier_call; + err = devfreq_register_notifier(df, &podgov->nb, DEVFREQ_TRANSITION_NOTIFIER); + if (err) + goto free_data; - podgov->p_load_max = 900; - podgov->p_load_target = 700; - podgov->p_bias = 80; - podgov->p_smooth = 10; - podgov->p_damp = 7; - podgov->p_block_window = 50000; + /* Expose tunable params under devfreq sysfs */ + err = sysfs_create_group(&df->dev.kobj, &dev_attr_group); + if (err) + goto unregister_notifier; - podgov->adjustment_type = ADJUSTMENT_DEVICE_REQ; - podgov->p_user = 0; - - /* Reset clock counters */ - podgov->last_scale = now; - - podgov->power_manager = df; - - mutex_init(&podgov->lock); - - attr = &podgov->enable_3d_scaling_attr; - attr->attr.name = "enable_3d_scaling"; - attr->attr.mode = S_IWUSR | S_IRUGO; - attr->show = enable_3d_scaling_show; - attr->store = enable_3d_scaling_store; - sysfs_attr_init(&attr->attr); - if (sysfs_create_file(&df->dev.parent->kobj, &attr->attr)) - goto err_create_enable_sysfs_entry; - - attr = &podgov->freq_request_attr; - attr->attr.name = "freq_request"; - attr->attr.mode = S_IWUSR | S_IRUGO; - attr->show = freq_request_show; - attr->store = freq_request_store; - sysfs_attr_init(&attr->attr); - if (sysfs_create_file(&df->dev.parent->kobj, &attr->attr)) - goto err_create_request_sysfs_entry; - - attr = &podgov->user_attr; - attr->attr.name = "user"; - attr->attr.mode = S_IWUSR | S_IRUGO; - attr->show = user_show; - attr->store = user_store; - sysfs_attr_init(&attr->attr); - if (sysfs_create_file(&df->dev.parent->kobj, &attr->attr)) - goto err_create_user_sysfs_entry; - -#if defined(NV_DEVFREQ_HAS_FREQ_TABLE) - podgov->freq_count = df->max_state; - podgov->freqlist = df->freq_table; -#else - podgov->freq_count = df->profile->max_state; - podgov->freqlist = df->profile->freq_table; -#endif - if (!podgov->freq_count || !podgov->freqlist) - goto err_get_freqs; - - /* store the limits */ - podgov->p_freq_request = podgov->freqlist[podgov->freq_count - 1]; - - podgov->freq_avg = 0; - - nvhost_scale_emc_debug_init(df); - - devfreq_monitor_start(df); + if (!pm_runtime_suspended(df->dev.parent)) { + devfreq_monitor_start(df); + } return 0; -err_get_freqs: - sysfs_remove_file(&df->dev.parent->kobj, &podgov->user_attr.attr); -err_create_user_sysfs_entry: - sysfs_remove_file(&df->dev.parent->kobj, - &podgov->freq_request_attr.attr); -err_create_request_sysfs_entry: - sysfs_remove_file(&df->dev.parent->kobj, - &podgov->enable_3d_scaling_attr.attr); -err_create_enable_sysfs_entry: - dev_err(&d->dev, "failed to create sysfs attributes"); - kfree(podgov->cycles_history_buf); -err_alloc_history_buffer: - kfree(podgov); -err_alloc_podgov: - return -ENOMEM; +unregister_notifier: + devfreq_unregister_notifier(df, &podgov->nb, DEVFREQ_TRANSITION_NOTIFIER); +free_data: + kfree(df->governor_data); + df->governor_data = NULL; + return err; } -/******************************************************************************* - * nvhost_pod_exit(struct devfreq *df) - * - * Clean up governor data structures - ******************************************************************************/ - static void nvhost_pod_exit(struct devfreq *df) { - struct podgov_info_rec *podgov = df->data; + struct podgov_data *podgov = df->governor_data; devfreq_monitor_stop(df); + sysfs_remove_group(&df->dev.kobj, &dev_attr_group); + devfreq_unregister_notifier(df, &podgov->nb, DEVFREQ_TRANSITION_NOTIFIER); - sysfs_remove_file(&df->dev.parent->kobj, &podgov->user_attr.attr); - sysfs_remove_file(&df->dev.parent->kobj, - &podgov->freq_request_attr.attr); - sysfs_remove_file(&df->dev.parent->kobj, - &podgov->enable_3d_scaling_attr.attr); - - nvhost_scale_emc_debug_deinit(df); - kfree(podgov->cycles_history_buf); - kfree(podgov); + kfree(df->governor_data); + df->governor_data = NULL; } -/****************************************************************************** - * nvhost_pod_suspend(struct devfreq *df) - * - * Suspends the governor. - *****************************************************************************/ - static void nvhost_pod_suspend(struct devfreq *df) { - // Record suspension in our own data structure because we'll have to - // erase and restore devfreq's for this to work. - struct podgov_info_rec *pg = df->data; + struct podgov_data *podgov = df->governor_data; - pg->suspended = 1; - - // Update frequency for the final time before going into suspension. - mutex_lock(&df->lock); - update_devfreq(df); - mutex_unlock(&df->lock); + podgov->avg_load = 0; devfreq_monitor_suspend(df); } -/****************************************************************************** - * nvhost_pod_resume(struct devfreq *df) - * - * Resumes the governor. - *****************************************************************************/ - static void nvhost_pod_resume(struct devfreq *df) { - // Update our data structure's suspension field - struct podgov_info_rec *pg = df->data; - - pg->suspended = 0; - - // Resume devfreq_monitor_resume(df); } @@ -904,10 +342,9 @@ static int nvhost_pod_event_handler(struct devfreq *df, } static struct devfreq_governor nvhost_podgov = { - .name = "nvhost_podgov", - .attrs = DEVFREQ_GOV_ATTR_POLLING_INTERVAL - | DEVFREQ_GOV_ATTR_TIMER, - .get_target_freq = nvhost_pod_estimate_freq, + .name = DEVFREQ_GOV_NVHOST_PODGOV, + .attrs = DEVFREQ_GOV_ATTR_POLLING_INTERVAL | DEVFREQ_GOV_ATTR_TIMER, + .get_target_freq = nvhost_pod_target_freq, .event_handler = nvhost_pod_event_handler, }; @@ -916,14 +353,12 @@ static int __init podgov_init(void) { return devfreq_add_governor(&nvhost_podgov); } +subsys_initcall(podgov_init); static void __exit podgov_exit(void) { devfreq_remove_governor(&nvhost_podgov); return; } - -/* governor must be registered before initialising client devices */ -rootfs_initcall(podgov_init); module_exit(podgov_exit); MODULE_LICENSE("GPL"); diff --git a/include/linux/devfreq/nvhost_podgov.h b/include/linux/devfreq/nvhost_podgov.h new file mode 100644 index 00000000..fbc17df3 --- /dev/null +++ b/include/linux/devfreq/nvhost_podgov.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + */ + +#ifndef DEVFREQ_NVHOST_PODGOV_H +#define DEVFREQ_NVHOST_PODGOV_H + +#define DEVFREQ_GOV_NVHOST_PODGOV "nvhost_podgov" + +#endif /* DEVFREQ_NVHOST_PODGOV_H */ diff --git a/include/trace/events/nvhost_podgov.h b/include/trace/events/nvhost_podgov.h deleted file mode 100644 index 8c63d29b..00000000 --- a/include/trace/events/nvhost_podgov.h +++ /dev/null @@ -1,250 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2013-2023, NVIDIA Corporation. All rights reserved. - * - * Nvhost event logging to ftrace. - */ - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM nvhost_podgov - -#if !defined(_TRACE_NVHOST_PODGOV_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_NVHOST_PODGOV_H - -#include -#include -#include - -DECLARE_EVENT_CLASS(podgov_update_freq, - TP_PROTO(struct device *dev, unsigned long old_freq, unsigned long new_freq), - - TP_ARGS(dev, old_freq, new_freq), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(unsigned long, old_freq) - __field(unsigned long, new_freq) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->old_freq = old_freq; - __entry->new_freq = new_freq; - ), - - TP_printk("name=%s, old_freq=%lu, new_freq=%lu", - dev_name(__entry->dev), __entry->old_freq, __entry->new_freq) -); - -DEFINE_EVENT(podgov_update_freq, podgov_do_scale, - TP_PROTO(struct device *dev, unsigned long old_freq, unsigned long new_freq), - TP_ARGS(dev, old_freq, new_freq) -); - -DEFINE_EVENT(podgov_update_freq, podgov_scaling_state_check, - TP_PROTO(struct device *dev, unsigned long old_freq, unsigned long new_freq), - TP_ARGS(dev, old_freq, new_freq) -); - -DEFINE_EVENT(podgov_update_freq, podgov_estimate_freq, - TP_PROTO(struct device *dev, unsigned long old_freq, unsigned long new_freq), - TP_ARGS(dev, old_freq, new_freq) -); - -DEFINE_EVENT(podgov_update_freq, podgov_clocks_handler, - TP_PROTO(struct device *dev, unsigned long old_freq, unsigned long new_freq), - TP_ARGS(dev, old_freq, new_freq) -); - -TRACE_EVENT(podgov_enabled, - TP_PROTO(struct device *dev, int enable), - - TP_ARGS(dev, enable), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(int, enable) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->enable = enable; - ), - - TP_printk("name=%s, scaling_enabled=%d", dev_name(__entry->dev), __entry->enable) -); - -TRACE_EVENT(podgov_set_user_ctl, - TP_PROTO(struct device *dev, int user_ctl), - - TP_ARGS(dev, user_ctl), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(int, user_ctl) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->user_ctl = user_ctl; - ), - - TP_printk("name=%s, userspace control=%d", dev_name(__entry->dev), __entry->user_ctl) -); - -TRACE_EVENT(podgov_set_freq_request, - TP_PROTO(struct device *dev, int freq_request), - - TP_ARGS(dev, freq_request), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(int, freq_request) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->freq_request = freq_request; - ), - - TP_printk("name=%s, freq_request=%d", dev_name(__entry->dev), __entry->freq_request) -); - -TRACE_EVENT(podgov_busy, - TP_PROTO(struct device *dev, unsigned long busyness), - - TP_ARGS(dev, busyness), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(unsigned long, busyness) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->busyness = busyness; - ), - - TP_printk("name=%s, busyness=%lu", dev_name(__entry->dev), __entry->busyness) -); - -TRACE_EVENT(podgov_hint, - TP_PROTO(struct device *dev, long idle_estimate, int hint), - - TP_ARGS(dev, idle_estimate, hint), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(long, idle_estimate) - __field(int, hint) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->idle_estimate = idle_estimate; - __entry->hint = hint; - ), - - TP_printk("podgov (%s): idle %ld, hint %d", dev_name(__entry->dev), - __entry->idle_estimate, __entry->hint) -); - -TRACE_EVENT(podgov_idle, - TP_PROTO(struct device *dev, unsigned long idleness), - - TP_ARGS(dev, idleness), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(unsigned long, idleness) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->idleness = idleness; - ), - - TP_printk("name=%s, idleness=%lu", dev_name(__entry->dev), __entry->idleness) -); - -TRACE_EVENT(podgov_load, - TP_PROTO(struct device *dev, unsigned long load), - - TP_ARGS(dev, load), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(unsigned long, load) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->load = load; - ), - - TP_printk("name=%s, load=%lu", dev_name(__entry->dev), __entry->load) -); - -TRACE_EVENT(podgov_print_target, - TP_PROTO(struct device *dev, long busy, int avg_busy, long curr, - long target, int hint, int avg_hint), - - TP_ARGS(dev, busy, avg_busy, curr, target, hint, avg_hint), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(long, busy) - __field(int, avg_busy) - __field(long, curr) - __field(long, target) - __field(int, hint) - __field(int, avg_hint) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->busy = busy; - __entry->avg_busy = avg_busy; - __entry->curr = curr; - __entry->target = target; - __entry->hint = hint; - __entry->avg_hint = avg_hint; - ), - - TP_printk("podgov (%s): busy %ld <%d>, curr %ld, t %ld, hint %d <%d>\n", - dev_name(__entry->dev), __entry->busy, __entry->avg_busy, __entry->curr, - __entry->target, __entry->hint, __entry->avg_hint) -); - -TRACE_EVENT(podgov_stats, - TP_PROTO(struct device *dev, int fast_up_count, int slow_down_count, - unsigned int idle_min, unsigned int idle_max), - - TP_ARGS(dev, fast_up_count, slow_down_count, idle_min, idle_max), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(int, fast_up_count) - __field(int, slow_down_count) - __field(unsigned int, idle_min) - __field(unsigned int, idle_max) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->fast_up_count = fast_up_count; - __entry->slow_down_count = slow_down_count; - __entry->idle_min = idle_min; - __entry->idle_max = idle_max; - ), - - TP_printk("podgov stats (%s): + %d - %d min %u max %u\n", - dev_name(__entry->dev), __entry->fast_up_count, - __entry->slow_down_count, __entry->idle_min, - __entry->idle_max) -); - -#endif /* _TRACE_NVHOST_PODGOV_H */ - -/* This part must be outside protection */ -#include