nvidia-oot: cpuidle: Support coordinated wake

Adding debugfs support for the coordinated wake state. This will allow for the end-user to select a number of destination CPUs, and then force them into idle together. A chosen "src" cpu will then trigger a simultaneous wake request on all of the cores to create the worst-case idle-exit scenario. We also have an alternate state to support only coordinated sleep. This can be useful for cases where select cores coordinated together can have deeper power modes available to them. TPS-671 Change-Id: I901f86000d36a86f9549f713ba698b58896d55b7 Signed-off-by: Ishan Shah <ishah@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3242284 Reviewed-by: Nathan Hartman <nhartman@nvidia.com> GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com> Reviewed-by: Sanjay Chandrashekara <sanjayc@nvidia.com>
2025-12-22 17:25:35 +03:00 · 2024-11-04 19:50:25 +00:00
parent 7512e6b575
commit 3969211745
3 changed files with 323 additions and 8 deletions
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.  All rights reserved.
 obj-m += cpuidle-tegra-auto.o
 obj-m += cpuidle-debugfs.o
 obj-m += cpuidle-cg-disable-t264.o
 CFLAGS_cpuidle-debugfs.o := -I$(src)
--- a/drivers/cpuidle/cpuidle-debugfs.c
+++ b/drivers/cpuidle/cpuidle-debugfs.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-// SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 /*
 * Module to force cpuidle states through debugfs files.
 */
@@ -12,16 +12,61 @@
 #include <linux/debugfs.h>
 #include <linux/cpu.h>
 #include <linux/cpuidle.h>
 #include <linux/cpumask.h>
 #include <linux/delay.h>
 #include <linux/smp.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/cpuidle_debugfs_ftrace.h>
 #define US_TO_NS(x) (1000 * x)
 static struct cpuidle_driver *drv;
 /**
 * We have two cpumasks defined, groups a and b.
 * They are meant generally to support forced-idle entry for two different
 * time periods. E.g. you may want CPUs in a to reside for 10ms, but b to reside
 * for 100 ms. In this way, you can test the Coordination of various idle-states,
 * as the desired residency & latency can be passed to the OS (or implicitly to
 * the platform) to make decisions about deeper idle states.
 * This can also be used to test waking up cores at varying points.
 *
 * In the latency-test scenario, where you are using ipi-wake, only CPUs in mask
 * a are going to be woken up via IPI. This can allow for those CPUs in b to
 * stay asleep for longer periods of time, which may reveal the effects of e.g.
 * keeping one core in a clusterpair/one thread in a thread-pair asleep and have
 * the other woken up.
 */
 /* Core-number for ipi-sourcing */
 static u64 ipi_src_cpu;
 /* CPU Mask struct for the coordinated-entry functions */
 static struct cpumask sleep_dest_a;
 static struct cpumask sleep_dest_b;
 /* Desired cc7 residency for coordinated-entry functions */
 static u64 sleep_residency_ns_a;
 static u64 sleep_residency_ns_b;
 /* Custom struct to encapsulate idle-state details & work struct */
 struct coordinated_sleep_struct {
 	bool do_coordinated_wakeup;
 	uint64_t duration_ns;
 	uint64_t exit_latency_ns;
 	struct work_struct work;
 };
 /* Struct for coordinating idle-entry & exit */
 struct coordinated_sleep_struct coordination_params;
 /* Per-CPU struct for idle-state details & work struct */
 static DEFINE_PER_CPU(struct coordinated_sleep_struct, enter_idle_work);
 static bool is_timer_irq(struct irq_desc *desc)
 {
 	return desc && desc->action && (desc->action->flags & IRQF_TIMER);
 }
 /* Function to disable all non-Timer IRQs. We need Timers for CC7-Wake. */
 static void suspend_all_device_irqs(void)
 {
 	struct irq_data *data;
@@ -72,35 +117,246 @@ static void resume_all_device_irqs(void)
 	}
 }
 /* play_idle_precise wrapper with IRQs disabled. */
 /* Assumed to be running on the target-core */
 static void forced_idle_entry(u64 duration_ns, u64 exit_latency_ns)
 {
 	suspend_all_device_irqs();
 	/* duration_ns, latency_ns */
 	play_idle_precise(duration_ns, exit_latency_ns);
 	resume_all_device_irqs();
 }
 /* Function that runs on each CPU as part of the work struct */
 /* This forces it into the appropriate low-power idle state */
 static void forced_idle_work_func(struct work_struct *work)
 {
 	struct coordinated_sleep_struct *this_cpu_sleep = container_of(work,
 		struct coordinated_sleep_struct, work);
 	forced_idle_entry(this_cpu_sleep->duration_ns, this_cpu_sleep->exit_latency_ns);
 }
 /* Function that runs on each CPU after wakeup */
 static void forced_wakeup_work_func(void *info)
 {
 	trace_cpuidle_debugfs_print("Scheduled task after CPU_SUSPEND\n");
 }
 /* Function that runs on each CPU as part of the SMP interrupt call */
 /* This will call into the workqueue functionality and schedule the forced_idle_work_func */
 static void enter_work_func(void *info)
 {
 	struct coordinated_sleep_struct *this_cpu_sleep = this_cpu_ptr(&enter_idle_work);
 	queue_work_on(smp_processor_id(), system_highpri_wq, &(this_cpu_sleep->work));
 }
 /* Function that runs on each CPU as an SMP interrupt call */
 /* This will update the per_cpu sleep_details */
 static void update_this_cpu_sleep_target(void *info)
 {
 	struct coordinated_sleep_struct *this_cpu_sleep = this_cpu_ptr(&enter_idle_work);
 	struct coordinated_sleep_struct *sleep_details = (struct coordinated_sleep_struct *) info;
 	/* Params are passed to forced_idle_entry func */
 	this_cpu_sleep->duration_ns = sleep_details->duration_ns;
 	this_cpu_sleep->exit_latency_ns = sleep_details->exit_latency_ns;
 }
 /* Function that runs on ipi_src_cpu to coordinate entry into forced idle */
 /* Optionally: then coordinate a synchronized exit out of idle */
 static void coordinated_forced_idle_work_func(struct work_struct *work)
 {
 	struct coordinated_sleep_struct *sleep_details =
 		container_of(work, struct coordinated_sleep_struct, work);
 	struct cpumask combined_mask;
 	cpumask_or(&combined_mask, &sleep_dest_a, &sleep_dest_b);
 	/* Copy a/b parameters into a & b respectively */
 	sleep_details->duration_ns = sleep_residency_ns_a;
 	smp_call_function_many(&sleep_dest_a, update_this_cpu_sleep_target,
 		sleep_details, true);
 	sleep_details->duration_ns = sleep_residency_ns_b;
 	smp_call_function_many(&sleep_dest_b, update_this_cpu_sleep_target,
 		sleep_details, true);
 	/* Call into sleep-entry */
 	smp_call_function_many(&combined_mask, enter_work_func, NULL, true);
 	if (sleep_details->do_coordinated_wakeup) {
 		/* Assume that the tasks will be scheduled */
 		/* Delay for roughly 1/2 of the target residency period */
 		/* We will use ndelay to avoid yielding the CPU */
 		ndelay(sleep_residency_ns_a / 2);
 		trace_cpuidle_debugfs_print("Triggering wake IPI\n");
 		smp_call_function_many(&sleep_dest_a, forced_wakeup_work_func, NULL, true);
 		trace_cpuidle_debugfs_print("Yielding ipi_src_cpu\n");
 	}
 }
 static int forced_idle_write(void *data, u64 val)
 {
 	struct cpuidle_state *idle_state = (struct cpuidle_state *) data;
 	int ret = 0;
 	u64 duration_ns = US_TO_NS(val);
-	suspend_all_device_irqs();
+	forced_idle_entry(duration_ns, (u64) (idle_state->exit_latency_ns));
 	/* duration_ns, latency_ns */
 	play_idle_precise(duration_ns, (u64) (idle_state->exit_latency_ns));
 	resume_all_device_irqs();
 	return ret;
 }
 /* Shared function to sanity-check cpu-masks and queue up given work on src_cpu */
 /* Importantly, this can run on ANY core. But it will coordinate work to be run */
 /* by the ipi_src_cpu on the various ipi_dest_cpus. */
 static int coordinated_sleep_setup_and_queue(void *data, u64 val,
 	struct coordinated_sleep_struct *idle_params)
 {
 	struct cpuidle_state *idle_state = (struct cpuidle_state *) data;
 	int ret = 0;
 	idle_params->exit_latency_ns = (u64) (idle_state->exit_latency_ns);
 	if (cpumask_empty(&sleep_dest_a) && cpumask_empty(&sleep_dest_b)) {
 		pr_info("Coordinated Wake Test: both cpumasks are empty\n");
 		ret = -EINVAL;
 		goto out;
 	}
 	queue_work_on(ipi_src_cpu, system_highpri_wq, &(idle_params->work));
 	if (!flush_work(&(idle_params->work))) {
 		pr_info("Coordinated Wake Test: test did not finish\n");
 		ret = -EINVAL;
 		goto out;
 	}
 out:
 	return ret;
 }
 static int coordinated_forced_idle_write(void *data, u64 val)
 {
 	coordination_params.do_coordinated_wakeup = false;
 	return coordinated_sleep_setup_and_queue(data, val, &coordination_params);
 }
 static int ipi_wake_coordinated_forced_idle_write(void *data, u64 val)
 {
 	coordination_params.do_coordinated_wakeup = true;
 	return coordinated_sleep_setup_and_queue(data, val, &coordination_params);
 }
 /* Takes in userspace data & sets/unsets cpumask accordingly */
 static ssize_t parse_and_set_user_cpumask(struct file *file, const char __user *buf,
 	size_t count, loff_t *pos, bool set)
 {
 	ssize_t err;
 	struct cpumask new_value;
 	struct cpumask *oldmask = (struct cpumask *) file->private_data;
 	err = cpumask_parselist_user(buf, count, &new_value);
 	if (err == 0) {
 		if (set == true)
 			cpumask_or(oldmask, oldmask, &new_value);
 		else
 			cpumask_andnot(oldmask, oldmask, &new_value);
 		err = count;
 	}
 	return err;
 }
 static ssize_t set_ipi_dest_cpumask(struct file *file, const char __user *buf,
 	size_t count, loff_t *pos)
 {
 	return parse_and_set_user_cpumask(file, buf, count, pos, true);
 }
 static ssize_t clear_ipi_dest_cpu_mask(struct file *file, const char __user *buf,
 	size_t count, loff_t *pos)
 {
 	return parse_and_set_user_cpumask(file, buf, count, pos, false);
 }
 static ssize_t dest_cpumask_read(struct file *file, char __user *buf,
 	size_t count, loff_t *ppos)
 {
 	int len;
 	char pbuf[1024] = { 0 };
 	struct cpumask *mask = (struct cpumask *) file->private_data;
 	len = snprintf(pbuf, 1024, "%*pbl\n", cpumask_pr_args(mask));
 	return simple_read_from_buffer(buf, count, ppos, pbuf, len);
 }
 DEFINE_SIMPLE_ATTRIBUTE(idle_state_fops, NULL, forced_idle_write, "%llu\n");
 DEFINE_SIMPLE_ATTRIBUTE(coordinated_idle_state_fops, NULL, coordinated_forced_idle_write, "%llu\n");
 DEFINE_SIMPLE_ATTRIBUTE(ipi_wake_coordinated_idle_state_fops, NULL,
 	ipi_wake_coordinated_forced_idle_write, "%llu\n");
 static const struct file_operations set_ipi_dest_cpumask_fops = {
 	.owner		= THIS_MODULE,
 	.open		= simple_open,
 	.read		= dest_cpumask_read,
 	.write		= set_ipi_dest_cpumask,
 	.llseek		= noop_llseek,
 };
 static const struct file_operations clear_ipi_dest_cpumask_fops = {
 	.owner		= THIS_MODULE,
 	.open		= simple_open,
 	.read		= dest_cpumask_read,
 	.write		= clear_ipi_dest_cpu_mask,
 	.llseek		= noop_llseek,
 };
 static struct dentry *cpuidle_debugfs_node;
 static int init_debugfs(void)
 {
 	int i;
 	static struct dentry *coordinated_debugfs_node;
 	/* coordinated_ (12) + state-name (up to 7) + \0 (1) */
 	char coordinated_wake_file[20];
 	/* ipi_wake_coordinated_ (27) + state-name (up to 7) + \0 (1) */
 	char ipi_wake_file[35];
 	cpuidle_debugfs_node = debugfs_create_dir("cpuidle_debug", NULL);
 	if (!cpuidle_debugfs_node)
 		goto err_out;
 	coordinated_debugfs_node = debugfs_create_dir("coordinated_cpuidle", cpuidle_debugfs_node);
 	if (!coordinated_debugfs_node)
 		goto err_out;
 	debugfs_create_u64("coordinating_cpu", 0600, coordinated_debugfs_node, &ipi_src_cpu);
 	debugfs_create_file("set_cpuidle_dest_cpumask_a", 0600, coordinated_debugfs_node,
 		&sleep_dest_a, &set_ipi_dest_cpumask_fops);
 	debugfs_create_file("clear_cpuidle_dest_cpumask_a", 0600, coordinated_debugfs_node,
 		&sleep_dest_a, &clear_ipi_dest_cpumask_fops);
 	debugfs_create_u64("cpuidle_residency_ns_a", 0600, coordinated_debugfs_node,
 		&sleep_residency_ns_a);
 	debugfs_create_file("set_cpuidle_dest_cpumask_b", 0600, coordinated_debugfs_node,
 		&sleep_dest_b, &set_ipi_dest_cpumask_fops);
 	debugfs_create_file("clear_cpuidle_dest_cpumask_b", 0600, coordinated_debugfs_node,
 		&sleep_dest_b, &clear_ipi_dest_cpumask_fops);
 	debugfs_create_u64("cpuidle_residency_ns_b", 0600, coordinated_debugfs_node,
 		&sleep_residency_ns_b);
 	/* Initialize per-state knobs */
 	for (i = 0; i < drv->state_count; i++) {
 		snprintf(coordinated_wake_file, 20, "coordinated_%s", drv->states[i].name);
 		snprintf(ipi_wake_file, 35, "ipi_wake_coordinated_%s", drv->states[i].name);
 		debugfs_create_file(drv->states[i].name, 0200,
 			cpuidle_debugfs_node, &(drv->states[i]), &idle_state_fops);
 		debugfs_create_file(coordinated_wake_file, 0200,
 			coordinated_debugfs_node, &(drv->states[i]), &coordinated_idle_state_fops);
 		debugfs_create_file(ipi_wake_file, 0200,
 			coordinated_debugfs_node, &(drv->states[i]),
 			&ipi_wake_coordinated_idle_state_fops);
 	}
 	return 0;
@@ -112,9 +368,18 @@ err_out:
 static int __init cpuidle_debugfs_probe(void)
 {
 	int cpu;
 	drv = cpuidle_get_driver();
-	init_debugfs();
+
-	return 0;
+	/* Init the workqueue functions */
 	INIT_WORK(&(coordination_params.work), coordinated_forced_idle_work_func);
 	for_each_possible_cpu(cpu) {
 		struct coordinated_sleep_struct *sleep_work = &per_cpu(enter_idle_work, cpu);
 		INIT_WORK(&(sleep_work->work), forced_idle_work_func);
 	}
 	return init_debugfs();
 }
 static void __exit cpuidle_debugfs_remove(void)
--- a/include/trace/events/cpuidle_debugfs_ftrace.h
+++ b/include/trace/events/cpuidle_debugfs_ftrace.h
@@ -0,0 +1,48 @@
 /*
 * cpuidle event logging to ftrace.
 *
 * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM cpuidle_debugfs_ftrace
 #if !defined(_TRACE_CPUIDLE_DEBUGFS_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_CPUIDLE_DEBUGFS_H
 #include <linux/ktime.h>
 #include <linux/tracepoint.h>
 TRACE_EVENT(cpuidle_debugfs_print,
 	TP_PROTO(
 		const char *str
 	),
 	TP_ARGS(str),
 	TP_STRUCT__entry(
 		__field(const char *, str)
 	),
 	TP_fast_assign(
 		__entry->str = str;
 	),
 	TP_printk("%s",
 		__entry->str
 	)
 );
 #endif /* _TRACE_CPUIDLE_DEBUGFS_H */
 /* This part must be outside protection */
 #include <trace/define_trace.h>