diff --git a/drivers/devfreq/governor_pod_scaling.c b/drivers/devfreq/governor_pod_scaling.c index 077c8b95..51ed9525 100644 --- a/drivers/devfreq/governor_pod_scaling.c +++ b/drivers/devfreq/governor_pod_scaling.c @@ -1,877 +1,315 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * SPDX-FileCopyrightText: Copyright (c) 2012-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - */ - -/* - * Power-on-demand clock scaling for nvhost devices - * - * devfreq calls nvhost_pod_estimate_freq() for estimating the new - * frequency for the device. The clocking is done using the load of the device - * is estimated using the busy times from the device profile. This information - * indicates if the device frequency should be altered. - * + * SPDX-FileCopyrightText: Copyright (c) 2012-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ #include #include -#include -#include #include +#include #include #include -#include #include #include #include #include -#define CREATE_TRACE_POINTS -#include #include -#define GET_TARGET_FREQ_DONTSCALE 1 -#ifdef CONFIG_DEVFREQ_GOV_POD_SCALING_HISTORY_BUFFER_SIZE_MAX -#define MAX_HISTORY_BUF_SIZE \ - CONFIG_DEVFREQ_GOV_POD_SCALING_HISTORY_BUFFER_SIZE_MAX -#else -#define MAX_HISTORY_BUF_SIZE 0 -#endif +#define DEFINE_ATTR_LOAD_STORE(name, limit) \ + static ssize_t name##_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, \ + size_t count) \ + { \ + struct devfreq *df = to_devfreq(dev); \ + struct podgov_data *podgov; \ + unsigned int name; \ + int ret; \ + \ + ret = kstrtouint(buf, 0, &name); \ + if (ret) \ + return ret; \ + \ + name = min_t(unsigned int, name, limit); \ + \ + mutex_lock(&df->lock); \ + podgov = df->governor_data; \ + podgov->name = name; \ + mutex_unlock(&df->lock); \ + \ + return count; \ + } \ + static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + struct devfreq *df = to_devfreq(dev); \ + struct podgov_data *podgov; \ + int err; \ + \ + mutex_lock(&df->lock); \ + podgov = df->governor_data; \ + err = sprintf(buf, "%u\n", podgov->name); \ + mutex_unlock(&df->lock); \ + \ + return err; \ + } \ + static DEVICE_ATTR_RW(name) -static void podgov_enable(struct devfreq *df, int enable); -static void podgov_set_user_ctl(struct devfreq *df, int enable); -static struct devfreq_governor nvhost_podgov; +/** + * struct podgov_data - governor private data stored in struct devfreq + * @load_target: Frequency scaling logic will try to keep the device + * running at the specified load with specific frequency. + * The valid value of 'load_target' ranges from 0 to 1000. + * @load_margin: Margin value associated with the 'load_target' to determine + * the load threshold to scale down the device frequency. If + * load_target equals to 700 and load_margin equals to 100, + * then the governor will scale down the frequency when load + * of device is below 600 (700 - 100). The valid value of + * 'load_margin' ranges from 0 to 1000. + * @load_max: Whenever the instantaneous load value exceeds this value + * , the governor will try to scale the device to whatever + * maximum frequency it can achieve. The valid value of + * 'load_max' ranges from 0 to 1000. + * @down_freq_margin: Number of frequency steps for scaling down the frequency + * when the moving-average load value below the load_target. + * @up_freq_margin: Number of frequency steps for scaling up the frequency + * when the moving-average load value beyond the load_target. + * @k: The moving-average weight factor for the average load. + * The average load of the device is calcualted as following: + * $$ + * avg_load = (avg_load * (2**k - 1) + load) / 2**k + * $$ + * @avg_load: Moving-average load value tracked by the governor. + * @freq_index: Index value of current frequency in the frequency table. + * @df: The devfreq device associated with the governor. + * @nb: Notifier block for DEVFREQ_TRANSITION_NOTIFIER list. + */ +struct podgov_data { + /* Tunable parameters */ + unsigned int load_target; + unsigned int load_margin; + unsigned int load_max; + unsigned int down_freq_margin; + unsigned int up_freq_margin; + unsigned int k; -/******************************************************************************* - * podgov_info_rec - gr3d scaling governor specific parameters - ******************************************************************************/ - -struct podgov_info_rec { - int enable; - int suspended; - int init; - - ktime_t last_scale; - - unsigned int p_block_window; - unsigned int p_smooth; - int p_damp; - int p_load_max; - int p_load_target; - int p_bias; - unsigned int p_user; - unsigned int p_freq_request; - - unsigned long cycles_norm; - unsigned long cycles_avg; - - unsigned long *cycles_history_buf; - int p_history_buf_size; - int history_next; - int history_count; - unsigned long recent_high; - - unsigned long rt_load; - - int adjustment_type; - unsigned long adjustment_frequency; - - struct devfreq *power_manager; - struct dentry *debugdir; - - unsigned long *freqlist; - int freq_count; - - int freq_avg; - - struct kobj_attribute enable_3d_scaling_attr; - struct kobj_attribute user_attr; - struct kobj_attribute freq_request_attr; - - struct mutex lock; + /* Private fields */ + unsigned int avg_load; + int freq_index; + struct devfreq *df; + struct notifier_block nb; }; -/******************************************************************************* - * Adjustment type is used to tell the source that requested frequency re- - * estimation. Type ADJUSTMENT_LOCAL indicates that the re-estimation was - * initiated by the governor itself. This happens when one of the worker - * threads want to adjust the frequency. - * - * ADJUSTMENT_DEVICE_REQ (default value) indicates that the adjustment was - * initiated by a device event. - ******************************************************************************/ +DEFINE_ATTR_LOAD_STORE(load_target, 1000); +DEFINE_ATTR_LOAD_STORE(load_margin, 1000); +DEFINE_ATTR_LOAD_STORE(load_max, 1000); +DEFINE_ATTR_LOAD_STORE(down_freq_margin, 10); +DEFINE_ATTR_LOAD_STORE(up_freq_margin, 10); +DEFINE_ATTR_LOAD_STORE(k, 10); -enum podgov_adjustment_type { - ADJUSTMENT_LOCAL = 0, - ADJUSTMENT_DEVICE_REQ = 1 +static struct attribute *dev_entries[] = { + &dev_attr_load_target.attr, + &dev_attr_load_margin.attr, + &dev_attr_load_max.attr, + &dev_attr_down_freq_margin.attr, + &dev_attr_up_freq_margin.attr, + &dev_attr_k.attr, + NULL, }; -#define HZ_PER_KHZ 1000 +static struct attribute_group dev_attr_group = { + .name = DEVFREQ_GOV_NVHOST_PODGOV, + .attrs = dev_entries, +}; -static void get_freq_range(struct devfreq *devfreq, - unsigned long *min_freq, - unsigned long *max_freq) +static int devfreq_get_freq_index(struct devfreq *df, unsigned long freq) { #if defined(NV_DEVFREQ_HAS_FREQ_TABLE) - unsigned long *freq_table = devfreq->freq_table; - unsigned int max_state = devfreq->max_state; + unsigned long *freq_table = df->freq_table; + unsigned int max_state = df->max_state; #else - unsigned long *freq_table = devfreq->profile->freq_table; - unsigned int max_state = devfreq->profile->max_state; + unsigned long *freq_table = df->profile->freq_table; + unsigned int max_state = df->profile->max_state; #endif + int i; - lockdep_assert_held(&devfreq->lock); - - if (freq_table[0] < freq_table[max_state - 1]) { - *min_freq = freq_table[0]; - *max_freq = freq_table[max_state - 1]; - } else { - *min_freq = freq_table[max_state - 1]; - *max_freq = freq_table[0]; - } - - /* Apply constraints from OPP interface */ - *min_freq = max(*min_freq, devfreq->scaling_min_freq); - *max_freq = min(*max_freq, devfreq->scaling_max_freq); - - if (*min_freq > *max_freq) - *min_freq = *max_freq; -} - -static void get_min_freq_limit(struct devfreq *df, unsigned long *min_freq_hz) -{ - unsigned long max_freq_hz; - - get_freq_range(df, min_freq_hz, &max_freq_hz); -} - -static void get_max_freq_limit(struct devfreq *df, unsigned long *max_freq_hz) -{ - unsigned long min_freq_hz; - - get_freq_range(df, &min_freq_hz, max_freq_hz); -} - -/******************************************************************************* - * scaling_limit(df, freq) - * - * Limit the given frequency - ******************************************************************************/ - -static void scaling_limit(struct devfreq *df, unsigned long *freq) -{ - unsigned long min_freq_hz = 0; - unsigned long max_freq_hz = 0; - - get_min_freq_limit(df, &min_freq_hz); - get_max_freq_limit(df, &max_freq_hz); - - if (*freq < min_freq_hz) - *freq = min_freq_hz; - else if (*freq > max_freq_hz) - *freq = max_freq_hz; -} - -/******************************************************************************* - * podgov_enable(dev, enable) - * - * This function enables (enable=1) or disables (enable=0) the automatic scaling - * of the device. If the device is disabled, the device's clock is set to its - * maximum. - ******************************************************************************/ - -static void podgov_enable(struct devfreq *df, int enable) -{ - struct device *dev = df->dev.parent; - struct podgov_info_rec *podgov = df->data; - bool polling; - - /* make sure the device is alive before doing any scaling */ - pm_runtime_get_noresume(dev); - - mutex_lock(&podgov->lock); - mutex_lock(&df->lock); - - trace_podgov_enabled(df->dev.parent, enable); - - /* store the enable information */ - podgov->enable = enable; - - /* skip local adjustment if we are enabling or the device is - * suspended */ - if (!enable && pm_runtime_active(dev)) { - /* full speed */ - get_max_freq_limit(df, &podgov->adjustment_frequency); - podgov->adjustment_type = ADJUSTMENT_LOCAL; - update_devfreq(df); - } - - polling = podgov->enable && !podgov->p_user; - - /* Need to unlock to call devfreq_monitor_suspend/resume() - * still holding podgov->lock to guarantee atomicity - */ - mutex_unlock(&df->lock); - - if (polling) - devfreq_monitor_resume(df); - else - devfreq_monitor_suspend(df); - - mutex_unlock(&podgov->lock); - pm_runtime_put(dev); -} - -/***************************************************************************** - * podgov_set_user_ctl(dev, user) - * - * This function enables or disables user control of the gpu. If user control - * is enabled, setting the freq_request controls the gpu frequency, and other - * gpu scaling mechanisms are disabled. - ******************************************************************************/ - -static void podgov_set_user_ctl(struct devfreq *df, int user) -{ - struct device *dev = df->dev.parent; - struct podgov_info_rec *podgov = df->data; - int old_user; - bool polling; - - /* make sure the device is alive before doing any scaling */ - pm_runtime_get_noresume(dev); - - mutex_lock(&podgov->lock); - mutex_lock(&df->lock); - - trace_podgov_set_user_ctl(df->dev.parent, user); - - /* store the new user value */ - old_user = podgov->p_user; - podgov->p_user = user; - - /* skip scaling, if scaling (or the whole device) is turned off - * - or the scaling already was in user mode */ - if (pm_runtime_active(dev) && podgov->enable && user && !old_user) { - /* write request */ - podgov->adjustment_frequency = podgov->p_freq_request; - podgov->adjustment_type = ADJUSTMENT_LOCAL; - update_devfreq(df); - } - - polling = podgov->enable && !podgov->p_user; - - /* Need to unlock to call devfreq_monitor_suspend/resume() - * still holding podgov->lock to guarantee atomicity - */ - mutex_unlock(&df->lock); - - if (polling) - devfreq_monitor_resume(df); - else - devfreq_monitor_suspend(df); - - mutex_unlock(&podgov->lock); - pm_runtime_put(dev); -} - -/***************************************************************************** - * podgov_set_freq_request(dev, user) - * - * Set the current freq request. If scaling is enabled, and podgov user space - * control is enabled, this will set the gpu frequency. - ******************************************************************************/ - -static void podgov_set_freq_request(struct devfreq *df, int freq_request) -{ - struct device *dev = df->dev.parent; - struct podgov_info_rec *podgov; - - /* make sure the device is alive before doing any scaling */ - pm_runtime_get_noresume(dev); - - mutex_lock(&df->lock); - - podgov = df->data; - - trace_podgov_set_freq_request(df->dev.parent, freq_request); - - podgov->p_freq_request = freq_request; - - /* update the request only if podgov is enabled, device is turned on - * and the scaling is in user mode */ - if (podgov->enable && podgov->p_user && - pm_runtime_active(dev)) { - podgov->adjustment_frequency = freq_request; - podgov->adjustment_type = ADJUSTMENT_LOCAL; - update_devfreq(df); - } - - mutex_unlock(&df->lock); - pm_runtime_put(dev); -} - - -/******************************************************************************* - * freq = scaling_state_check(df, time) - * - * This handler is called to adjust the frequency of the device. The function - * returns the desired frequency for the clock. If there is no need to tune the - * clock immediately, 0 is returned. - ******************************************************************************/ - -static unsigned long scaling_state_check(struct devfreq *df, ktime_t time) -{ - struct podgov_info_rec *pg = df->data; - struct devfreq_dev_status *ds = &df->last_status; - unsigned long dt, busyness, rt_load = pg->rt_load; - long max_boost, damp, freq, boost, res; - unsigned long max_freq_hz = 0; - - dt = (unsigned long) ktime_us_delta(time, pg->last_scale); - if (dt < pg->p_block_window || df->previous_freq == 0) - return 0; - - /* convert to mhz to avoid overflow */ - freq = df->previous_freq / 1000000; - get_max_freq_limit(df, &max_freq_hz); - max_boost = ((max_freq_hz / 3) / 1000000); - - /* calculate and trace load */ - busyness = 1000ULL * pg->cycles_avg / ds->current_frequency; - - /* consider recent high load if required */ - if (pg->p_history_buf_size && pg->history_count) - busyness = 1000ULL * pg->recent_high / ds->current_frequency; - - trace_podgov_load(df->dev.parent, rt_load); - trace_podgov_busy(df->dev.parent, busyness); - - damp = pg->p_damp; - - if (rt_load > pg->p_load_max) { - /* if too busy, scale up max/3, do not damp */ - boost = max_boost; - damp = 10; - } else { - /* boost = bias * freq * (busyness - target)/target */ - boost = busyness - pg->p_load_target; - boost *= (pg->p_bias * freq); - boost /= (100 * pg->p_load_target); - - /* clamp to max boost */ - boost = (boost < max_boost) ? boost : max_boost; - } - - /* calculate new request */ - res = freq + boost; - - /* Maintain average request */ - pg->freq_avg = (pg->freq_avg * pg->p_smooth) + res; - pg->freq_avg /= (pg->p_smooth+1); - - /* Applying damping to frequencies */ - res = ((damp * res) + ((10 - damp)*pg->freq_avg)) / 10; - - /* Convert to hz, limit, and apply */ - res = res * 1000000; - scaling_limit(df, &res); - trace_podgov_scaling_state_check(df->dev.parent, - df->previous_freq, res); - return res; -} - -/******************************************************************************* - * freqlist_up(podgov, target, steps) - * - * This function determines the frequency that is "steps" frequency steps - * higher compared to the target frequency. - ******************************************************************************/ - -static int freqlist_up(struct podgov_info_rec *podgov, unsigned long target, - int steps) -{ - int i, pos; - - for (i = 0; i < podgov->freq_count; i++) - if (podgov->freqlist[i] >= target) + for (i = 0; i < max_state; i++) { + if (freq_table[i] >= freq) break; - - pos = min(podgov->freq_count - 1, i + steps); - return podgov->freqlist[pos]; -} - -/******************************************************************************* - * debugfs interface for controlling 3d clock scaling on the fly - ******************************************************************************/ - -#ifdef CONFIG_DEBUG_FS - -static void nvhost_scale_emc_debug_init(struct devfreq *df) -{ - struct podgov_info_rec *podgov = df->data; - char dirname[128]; - int err; - - err = snprintf(dirname, sizeof(dirname), "%s_scaling", - to_platform_device(df->dev.parent)->name); - WARN_ON(err < 0); - - if (!podgov) - return; - - podgov->debugdir = debugfs_create_dir(dirname, NULL); - if (!podgov->debugdir) { - pr_err("podgov: can\'t create debugfs directory\n"); - return; } -#define CREATE_PODGOV_FILE(fname) \ - do {\ - debugfs_create_u32(#fname, S_IRUGO | S_IWUSR, \ - podgov->debugdir, &podgov->p_##fname); \ - } while (0) - - CREATE_PODGOV_FILE(block_window); - CREATE_PODGOV_FILE(load_max); - CREATE_PODGOV_FILE(load_target); - CREATE_PODGOV_FILE(bias); - CREATE_PODGOV_FILE(damp); - CREATE_PODGOV_FILE(smooth); -#undef CREATE_PODGOV_FILE + return i; } -static void nvhost_scale_emc_debug_deinit(struct devfreq *df) +static int devfreq_notifier_call(struct notifier_block *nb, unsigned long event, void *ptr) { - struct podgov_info_rec *podgov = df->data; + struct podgov_data *podgov = container_of(nb, struct podgov_data, nb); + struct devfreq_freqs *freqs = ptr; - debugfs_remove_recursive(podgov->debugdir); + switch (event) { + case DEVFREQ_POSTCHANGE: + podgov->freq_index = devfreq_get_freq_index(podgov->df, freqs->new); + break; + default: + break; + }; + + return NOTIFY_DONE; } -#else -static void nvhost_scale_emc_debug_init(struct devfreq *df) +static int nvhost_pod_target_freq(struct devfreq *df, unsigned long *freq) { - (void)df; -} - -static void nvhost_scale_emc_debug_deinit(struct devfreq *df) -{ - (void)df; -} -#endif - -/******************************************************************************* - * sysfs interface for enabling/disabling 3d scaling - ******************************************************************************/ - -static ssize_t enable_3d_scaling_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - struct podgov_info_rec *podgov = container_of(attr, - struct podgov_info_rec, - enable_3d_scaling_attr); - ssize_t res; - - res = snprintf(buf, PAGE_SIZE, "%d\n", podgov->enable); - WARN_ON(res < 0); - - return res; -} - -static ssize_t enable_3d_scaling_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct podgov_info_rec *podgov = container_of(attr, - struct podgov_info_rec, - enable_3d_scaling_attr); - unsigned long val = 0; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - podgov_enable(podgov->power_manager, val); - - return count; -} - -/******************************************************************************* - * sysfs interface for user space control - * user = [0,1] disables / enabled user space control - * freq_request is the sysfs node user space writes frequency requests to - ******************************************************************************/ - -static ssize_t user_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - struct podgov_info_rec *podgov = - container_of(attr, struct podgov_info_rec, user_attr); - ssize_t res; - - res = snprintf(buf, PAGE_SIZE, "%d\n", podgov->p_user); - WARN_ON(res < 0); - - return res; -} - -static ssize_t user_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct podgov_info_rec *podgov = - container_of(attr, struct podgov_info_rec, user_attr); - unsigned long val = 0; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - podgov_set_user_ctl(podgov->power_manager, val); - - return count; -} - -static ssize_t freq_request_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - struct podgov_info_rec *podgov = - container_of(attr, struct podgov_info_rec, freq_request_attr); - ssize_t res; - - res = snprintf(buf, PAGE_SIZE, "%d\n", podgov->p_freq_request); - WARN_ON(res < 0); - - return res; -} - -static ssize_t freq_request_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct podgov_info_rec *podgov = - container_of(attr, struct podgov_info_rec, freq_request_attr); - unsigned long val = 0; - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - podgov_set_freq_request(podgov->power_manager, val); - - return count; -} - -/******************************************************************************* - * nvhost_pod_estimate_freq(df, freq) - * - * This function is called for re-estimating the frequency. The function is - * called in three conditions: - * - * (1) Internal request to change the frequency. In this case a new clock - * target is immediately set for the device. - * (2) Call from the client (something has happened and re-estimation - * is required). - * (3) Some other reason (i.e. periodic call) - * - ******************************************************************************/ - -static int nvhost_pod_estimate_freq(struct devfreq *df, - unsigned long *freq) -{ - struct podgov_info_rec *pg = df->data; - struct devfreq_dev_status *ds; - int err, i; - int buf_size = pg->p_history_buf_size; - int buf_next = pg->history_next; - int buf_count = pg->history_count; - unsigned long *cycles_buffer = pg->cycles_history_buf; - ktime_t now; - unsigned long long norm_load; - - /* If the device is suspended, clear the history and set frequency to - * min freq. - */ - if (pg->suspended) { - *freq = DEVFREQ_MIN_FREQ; - pg->last_scale = ktime_get(); - i = 0; - for (; i < MAX_HISTORY_BUF_SIZE; i++) - pg->cycles_history_buf[i] = 0; - pg->history_count = 0; - pg->history_next = 0; - pg->recent_high = 0; - pg->freq_avg = 0; - return 0; - } - - /* Ensure maximal clock when scaling is disabled */ - if (!pg->enable) { - *freq = DEVFREQ_MAX_FREQ; - if (*freq == df->previous_freq) - return GET_TARGET_FREQ_DONTSCALE; - else - return 0; - } - - if (pg->p_user) { - *freq = pg->p_freq_request; - return 0; - } + struct podgov_data *podgov = df->governor_data; + struct devfreq_dev_status *status; + unsigned long load, weight; + unsigned int down_threshold; + int index, err; err = devfreq_update_stats(df); if (err) return err; - ds = &df->last_status; + status = &df->last_status; - if (ds->total_time == 0) { - *freq = ds->current_frequency; + /* Update two types of loads */ + load = status->busy_time * 1000 / status->total_time; + weight = 1 << podgov->k; + podgov->avg_load = (podgov->avg_load * (weight - 1) + load) / weight; + + /* Scale up to maximum frequency to respond transient peak workload */ + if (load >= podgov->load_max) { + *freq = ULONG_MAX; return 0; } - now = ktime_get(); + /* Scale up current frequency by number of steps */ + if (podgov->avg_load > podgov->load_target) { + index = podgov->freq_index + podgov->up_freq_margin; - /* Local adjustments (i.e. requests from kernel threads) are - * handled here */ +#if defined(NV_DEVFREQ_HAS_FREQ_TABLE) + if (index < 0) + index = df->max_state - 1; + else + index = min(index, (int)df->max_state - 1); - if (pg->adjustment_type == ADJUSTMENT_LOCAL) { + *freq = df->freq_table[index]; +#else + if (index < 0) + index = df->profile->max_state - 1; + else + index = min(index, (int)df->profile->max_state - 1); - pg->adjustment_type = ADJUSTMENT_DEVICE_REQ; - - /* Do not do unnecessary scaling */ - scaling_limit(df, &pg->adjustment_frequency); - - trace_podgov_estimate_freq(df->dev.parent, - df->previous_freq, - pg->adjustment_frequency); - - *freq = pg->adjustment_frequency; + *freq = df->profile->freq_table[index]; +#endif return 0; } - /* Sustain local variables */ - norm_load = (u64)ds->current_frequency * ds->busy_time / ds->total_time; - pg->cycles_norm = norm_load; - pg->cycles_avg = ((u64)pg->cycles_avg * pg->p_smooth + norm_load) / - (pg->p_smooth + 1); - pg->rt_load = 1000ULL * ds->busy_time / ds->total_time; - - /* Update history of normalized cycle counts and recent highest count */ - if (buf_size) { - if (buf_count == buf_size) { - pg->recent_high = 0; - i = (buf_next + 1) % buf_size; - for (; i != buf_next; i = (i + 1) % buf_size) { - if (cycles_buffer[i] > pg->recent_high) - pg->recent_high = cycles_buffer[i]; - } - } - cycles_buffer[buf_next] = norm_load; - pg->history_next = (buf_next + 1) % buf_size; - if (buf_count < buf_size) - pg->history_count += 1; - if (norm_load > pg->recent_high) - pg->recent_high = norm_load; + /* Scale down current frequency by number of steps */ + if (podgov->load_margin < podgov->load_target) { + down_threshold = podgov->load_target - podgov->load_margin; + } else { + down_threshold = 0; } - *freq = scaling_state_check(df, now); - - if (!(*freq)) { - *freq = ds->current_frequency; + if (podgov->avg_load < down_threshold) { + index = podgov->freq_index - podgov->down_freq_margin; + index = max(index, 0); +#if defined(NV_DEVFREQ_HAS_FREQ_TABLE) + *freq = df->freq_table[index]; +#else + *freq = df->profile->freq_table[index]; +#endif return 0; } - if ((*freq = freqlist_up(pg, *freq, 0)) == ds->current_frequency) - return 0; - - pg->last_scale = now; - - trace_podgov_estimate_freq(df->dev.parent, df->previous_freq, *freq); - - + /* Stay with the same frequency */ + *freq = status->current_frequency; return 0; } -/******************************************************************************* - * nvhost_pod_init(struct devfreq *df) - * - * Governor initialisation. - ******************************************************************************/ - static int nvhost_pod_init(struct devfreq *df) { - struct podgov_info_rec *podgov; - struct platform_device *d = to_platform_device(df->dev.parent); - ktime_t now = ktime_get(); + struct podgov_data *podgov; + struct devfreq_dev_status *status; + int err; - struct kobj_attribute *attr = NULL; + err = devfreq_update_stats(df); + if (err) + return err; - podgov = kzalloc(sizeof(struct podgov_info_rec), GFP_KERNEL); + status = &df->last_status; + + podgov = kzalloc(sizeof(*podgov), GFP_KERNEL); if (!podgov) - goto err_alloc_podgov; + return -ENOMEM; - podgov->cycles_history_buf = - kzalloc(sizeof(unsigned long) * MAX_HISTORY_BUF_SIZE, - GFP_KERNEL); - if (!podgov->cycles_history_buf) - goto err_alloc_history_buffer; + df->governor_data = (void *)podgov; - podgov->p_history_buf_size = - MAX_HISTORY_BUF_SIZE < 100 ? MAX_HISTORY_BUF_SIZE : 100; - podgov->history_count = 0; - podgov->history_next = 0; - podgov->recent_high = 0; + /* Set default scaling parameters */ + podgov->load_target = 700; + podgov->load_margin = 100; + podgov->load_max = 900; + podgov->down_freq_margin = 1; + podgov->up_freq_margin = 4; + podgov->k = 3; - df->data = (void *)podgov; + /* Reset private data */ + podgov->avg_load = 0; + podgov->freq_index = devfreq_get_freq_index(df, status->current_frequency); + podgov->df = df; - /* Set scaling parameter defaults */ - podgov->enable = 1; - podgov->suspended = 0; + podgov->nb.notifier_call = devfreq_notifier_call; + err = devfreq_register_notifier(df, &podgov->nb, DEVFREQ_TRANSITION_NOTIFIER); + if (err) + goto free_data; - podgov->p_load_max = 900; - podgov->p_load_target = 700; - podgov->p_bias = 80; - podgov->p_smooth = 10; - podgov->p_damp = 7; - podgov->p_block_window = 50000; + /* Expose tunable params under devfreq sysfs */ + err = sysfs_create_group(&df->dev.kobj, &dev_attr_group); + if (err) + goto unregister_notifier; - podgov->adjustment_type = ADJUSTMENT_DEVICE_REQ; - podgov->p_user = 0; - - /* Reset clock counters */ - podgov->last_scale = now; - - podgov->power_manager = df; - - mutex_init(&podgov->lock); - - attr = &podgov->enable_3d_scaling_attr; - attr->attr.name = "enable_3d_scaling"; - attr->attr.mode = S_IWUSR | S_IRUGO; - attr->show = enable_3d_scaling_show; - attr->store = enable_3d_scaling_store; - sysfs_attr_init(&attr->attr); - if (sysfs_create_file(&df->dev.parent->kobj, &attr->attr)) - goto err_create_enable_sysfs_entry; - - attr = &podgov->freq_request_attr; - attr->attr.name = "freq_request"; - attr->attr.mode = S_IWUSR | S_IRUGO; - attr->show = freq_request_show; - attr->store = freq_request_store; - sysfs_attr_init(&attr->attr); - if (sysfs_create_file(&df->dev.parent->kobj, &attr->attr)) - goto err_create_request_sysfs_entry; - - attr = &podgov->user_attr; - attr->attr.name = "user"; - attr->attr.mode = S_IWUSR | S_IRUGO; - attr->show = user_show; - attr->store = user_store; - sysfs_attr_init(&attr->attr); - if (sysfs_create_file(&df->dev.parent->kobj, &attr->attr)) - goto err_create_user_sysfs_entry; - -#if defined(NV_DEVFREQ_HAS_FREQ_TABLE) - podgov->freq_count = df->max_state; - podgov->freqlist = df->freq_table; -#else - podgov->freq_count = df->profile->max_state; - podgov->freqlist = df->profile->freq_table; -#endif - if (!podgov->freq_count || !podgov->freqlist) - goto err_get_freqs; - - /* store the limits */ - podgov->p_freq_request = podgov->freqlist[podgov->freq_count - 1]; - - podgov->freq_avg = 0; - - nvhost_scale_emc_debug_init(df); - - devfreq_monitor_start(df); + if (!pm_runtime_suspended(df->dev.parent)) { + devfreq_monitor_start(df); + } return 0; -err_get_freqs: - sysfs_remove_file(&df->dev.parent->kobj, &podgov->user_attr.attr); -err_create_user_sysfs_entry: - sysfs_remove_file(&df->dev.parent->kobj, - &podgov->freq_request_attr.attr); -err_create_request_sysfs_entry: - sysfs_remove_file(&df->dev.parent->kobj, - &podgov->enable_3d_scaling_attr.attr); -err_create_enable_sysfs_entry: - dev_err(&d->dev, "failed to create sysfs attributes"); - kfree(podgov->cycles_history_buf); -err_alloc_history_buffer: - kfree(podgov); -err_alloc_podgov: - return -ENOMEM; +unregister_notifier: + devfreq_unregister_notifier(df, &podgov->nb, DEVFREQ_TRANSITION_NOTIFIER); +free_data: + kfree(df->governor_data); + df->governor_data = NULL; + return err; } -/******************************************************************************* - * nvhost_pod_exit(struct devfreq *df) - * - * Clean up governor data structures - ******************************************************************************/ - static void nvhost_pod_exit(struct devfreq *df) { - struct podgov_info_rec *podgov = df->data; + struct podgov_data *podgov = df->governor_data; devfreq_monitor_stop(df); + sysfs_remove_group(&df->dev.kobj, &dev_attr_group); + devfreq_unregister_notifier(df, &podgov->nb, DEVFREQ_TRANSITION_NOTIFIER); - sysfs_remove_file(&df->dev.parent->kobj, &podgov->user_attr.attr); - sysfs_remove_file(&df->dev.parent->kobj, - &podgov->freq_request_attr.attr); - sysfs_remove_file(&df->dev.parent->kobj, - &podgov->enable_3d_scaling_attr.attr); - - nvhost_scale_emc_debug_deinit(df); - kfree(podgov->cycles_history_buf); - kfree(podgov); + kfree(df->governor_data); + df->governor_data = NULL; } -/****************************************************************************** - * nvhost_pod_suspend(struct devfreq *df) - * - * Suspends the governor. - *****************************************************************************/ - static void nvhost_pod_suspend(struct devfreq *df) { - // Record suspension in our own data structure because we'll have to - // erase and restore devfreq's for this to work. - struct podgov_info_rec *pg = df->data; + struct podgov_data *podgov = df->governor_data; - pg->suspended = 1; - - // Update frequency for the final time before going into suspension. - mutex_lock(&df->lock); - update_devfreq(df); - mutex_unlock(&df->lock); + podgov->avg_load = 0; devfreq_monitor_suspend(df); } -/****************************************************************************** - * nvhost_pod_resume(struct devfreq *df) - * - * Resumes the governor. - *****************************************************************************/ - static void nvhost_pod_resume(struct devfreq *df) { - // Update our data structure's suspension field - struct podgov_info_rec *pg = df->data; - - pg->suspended = 0; - - // Resume devfreq_monitor_resume(df); } @@ -904,10 +342,9 @@ static int nvhost_pod_event_handler(struct devfreq *df, } static struct devfreq_governor nvhost_podgov = { - .name = "nvhost_podgov", - .attrs = DEVFREQ_GOV_ATTR_POLLING_INTERVAL - | DEVFREQ_GOV_ATTR_TIMER, - .get_target_freq = nvhost_pod_estimate_freq, + .name = DEVFREQ_GOV_NVHOST_PODGOV, + .attrs = DEVFREQ_GOV_ATTR_POLLING_INTERVAL | DEVFREQ_GOV_ATTR_TIMER, + .get_target_freq = nvhost_pod_target_freq, .event_handler = nvhost_pod_event_handler, }; @@ -916,14 +353,12 @@ static int __init podgov_init(void) { return devfreq_add_governor(&nvhost_podgov); } +subsys_initcall(podgov_init); static void __exit podgov_exit(void) { devfreq_remove_governor(&nvhost_podgov); return; } - -/* governor must be registered before initialising client devices */ -rootfs_initcall(podgov_init); module_exit(podgov_exit); MODULE_LICENSE("GPL"); diff --git a/include/linux/devfreq/nvhost_podgov.h b/include/linux/devfreq/nvhost_podgov.h new file mode 100644 index 00000000..fbc17df3 --- /dev/null +++ b/include/linux/devfreq/nvhost_podgov.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + */ + +#ifndef DEVFREQ_NVHOST_PODGOV_H +#define DEVFREQ_NVHOST_PODGOV_H + +#define DEVFREQ_GOV_NVHOST_PODGOV "nvhost_podgov" + +#endif /* DEVFREQ_NVHOST_PODGOV_H */ diff --git a/include/trace/events/nvhost_podgov.h b/include/trace/events/nvhost_podgov.h deleted file mode 100644 index 8c63d29b..00000000 --- a/include/trace/events/nvhost_podgov.h +++ /dev/null @@ -1,250 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2013-2023, NVIDIA Corporation. All rights reserved. - * - * Nvhost event logging to ftrace. - */ - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM nvhost_podgov - -#if !defined(_TRACE_NVHOST_PODGOV_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_NVHOST_PODGOV_H - -#include -#include -#include - -DECLARE_EVENT_CLASS(podgov_update_freq, - TP_PROTO(struct device *dev, unsigned long old_freq, unsigned long new_freq), - - TP_ARGS(dev, old_freq, new_freq), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(unsigned long, old_freq) - __field(unsigned long, new_freq) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->old_freq = old_freq; - __entry->new_freq = new_freq; - ), - - TP_printk("name=%s, old_freq=%lu, new_freq=%lu", - dev_name(__entry->dev), __entry->old_freq, __entry->new_freq) -); - -DEFINE_EVENT(podgov_update_freq, podgov_do_scale, - TP_PROTO(struct device *dev, unsigned long old_freq, unsigned long new_freq), - TP_ARGS(dev, old_freq, new_freq) -); - -DEFINE_EVENT(podgov_update_freq, podgov_scaling_state_check, - TP_PROTO(struct device *dev, unsigned long old_freq, unsigned long new_freq), - TP_ARGS(dev, old_freq, new_freq) -); - -DEFINE_EVENT(podgov_update_freq, podgov_estimate_freq, - TP_PROTO(struct device *dev, unsigned long old_freq, unsigned long new_freq), - TP_ARGS(dev, old_freq, new_freq) -); - -DEFINE_EVENT(podgov_update_freq, podgov_clocks_handler, - TP_PROTO(struct device *dev, unsigned long old_freq, unsigned long new_freq), - TP_ARGS(dev, old_freq, new_freq) -); - -TRACE_EVENT(podgov_enabled, - TP_PROTO(struct device *dev, int enable), - - TP_ARGS(dev, enable), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(int, enable) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->enable = enable; - ), - - TP_printk("name=%s, scaling_enabled=%d", dev_name(__entry->dev), __entry->enable) -); - -TRACE_EVENT(podgov_set_user_ctl, - TP_PROTO(struct device *dev, int user_ctl), - - TP_ARGS(dev, user_ctl), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(int, user_ctl) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->user_ctl = user_ctl; - ), - - TP_printk("name=%s, userspace control=%d", dev_name(__entry->dev), __entry->user_ctl) -); - -TRACE_EVENT(podgov_set_freq_request, - TP_PROTO(struct device *dev, int freq_request), - - TP_ARGS(dev, freq_request), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(int, freq_request) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->freq_request = freq_request; - ), - - TP_printk("name=%s, freq_request=%d", dev_name(__entry->dev), __entry->freq_request) -); - -TRACE_EVENT(podgov_busy, - TP_PROTO(struct device *dev, unsigned long busyness), - - TP_ARGS(dev, busyness), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(unsigned long, busyness) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->busyness = busyness; - ), - - TP_printk("name=%s, busyness=%lu", dev_name(__entry->dev), __entry->busyness) -); - -TRACE_EVENT(podgov_hint, - TP_PROTO(struct device *dev, long idle_estimate, int hint), - - TP_ARGS(dev, idle_estimate, hint), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(long, idle_estimate) - __field(int, hint) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->idle_estimate = idle_estimate; - __entry->hint = hint; - ), - - TP_printk("podgov (%s): idle %ld, hint %d", dev_name(__entry->dev), - __entry->idle_estimate, __entry->hint) -); - -TRACE_EVENT(podgov_idle, - TP_PROTO(struct device *dev, unsigned long idleness), - - TP_ARGS(dev, idleness), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(unsigned long, idleness) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->idleness = idleness; - ), - - TP_printk("name=%s, idleness=%lu", dev_name(__entry->dev), __entry->idleness) -); - -TRACE_EVENT(podgov_load, - TP_PROTO(struct device *dev, unsigned long load), - - TP_ARGS(dev, load), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(unsigned long, load) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->load = load; - ), - - TP_printk("name=%s, load=%lu", dev_name(__entry->dev), __entry->load) -); - -TRACE_EVENT(podgov_print_target, - TP_PROTO(struct device *dev, long busy, int avg_busy, long curr, - long target, int hint, int avg_hint), - - TP_ARGS(dev, busy, avg_busy, curr, target, hint, avg_hint), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(long, busy) - __field(int, avg_busy) - __field(long, curr) - __field(long, target) - __field(int, hint) - __field(int, avg_hint) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->busy = busy; - __entry->avg_busy = avg_busy; - __entry->curr = curr; - __entry->target = target; - __entry->hint = hint; - __entry->avg_hint = avg_hint; - ), - - TP_printk("podgov (%s): busy %ld <%d>, curr %ld, t %ld, hint %d <%d>\n", - dev_name(__entry->dev), __entry->busy, __entry->avg_busy, __entry->curr, - __entry->target, __entry->hint, __entry->avg_hint) -); - -TRACE_EVENT(podgov_stats, - TP_PROTO(struct device *dev, int fast_up_count, int slow_down_count, - unsigned int idle_min, unsigned int idle_max), - - TP_ARGS(dev, fast_up_count, slow_down_count, idle_min, idle_max), - - TP_STRUCT__entry( - __field(struct device *, dev) - __field(int, fast_up_count) - __field(int, slow_down_count) - __field(unsigned int, idle_min) - __field(unsigned int, idle_max) - ), - - TP_fast_assign( - __entry->dev = dev; - __entry->fast_up_count = fast_up_count; - __entry->slow_down_count = slow_down_count; - __entry->idle_min = idle_min; - __entry->idle_max = idle_max; - ), - - TP_printk("podgov stats (%s): + %d - %d min %u max %u\n", - dev_name(__entry->dev), __entry->fast_up_count, - __entry->slow_down_count, __entry->idle_min, - __entry->idle_max) -); - -#endif /* _TRACE_NVHOST_PODGOV_H */ - -/* This part must be outside protection */ -#include