// SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_CPUS 12U #define MAX_CORES_PER_CLUSTER 4U #define ARI_TIMEOUT_MAX 2000U /* msec */ /* Register offsets for ARI request/results*/ #define ARI_REQUEST 0x0U #define ARI_REQUEST_EVENT_MASK 0x8U #define ARI_STATUS 0x10U #define ARI_REQUEST_DATA_LO 0x18U #define ARI_REQUEST_DATA_HI 0x20U #define ARI_RESPONSE_DATA_LO 0x28U #define ARI_RESPONSE_DATA_HI 0x30U /* Status values for the current request */ #define ARI_REQ_PENDING 1U #define ARI_REQ_ONGOING 3U #define ARI_REQUEST_VALID_BIT (1U << 8U) #define ARI_REQUEST_NS_BIT (1U << 31U) /* Write Enable bit */ #define CACHE_WAYS_WRITE_EN_BIT (1U << 15U) static void __iomem *ari_bar_array[MAX_CPUS]; static inline void ari_mmio_write_32(void __iomem *ari_base, u32 val, u32 reg) { writel(val, ari_base + reg); } static inline u32 ari_mmio_read_32(void __iomem *ari_base, u32 reg) { return readl(ari_base + reg); } static inline u32 ari_get_response_low(void __iomem *ari_base) { return ari_mmio_read_32(ari_base, ARI_RESPONSE_DATA_LO); } static inline u32 ari_get_response_high(void __iomem *ari_base) { return ari_mmio_read_32(ari_base, ARI_RESPONSE_DATA_HI); } static inline void ari_clobber_response(void __iomem *ari_base) { ari_mmio_write_32(ari_base, 0, ARI_RESPONSE_DATA_LO); ari_mmio_write_32(ari_base, 0, ARI_RESPONSE_DATA_HI); } static int32_t ari_send_request(void __iomem *ari_base, u32 evt_mask, u32 req, u32 lo, u32 hi) { uint32_t timeout = ARI_TIMEOUT_MAX; uint32_t status; int32_t ret = 0; /* clobber response */ ari_mmio_write_32(ari_base, 0, ARI_RESPONSE_DATA_LO); ari_mmio_write_32(ari_base, 0, ARI_RESPONSE_DATA_HI); /* send request */ ari_mmio_write_32(ari_base, lo, ARI_REQUEST_DATA_LO); ari_mmio_write_32(ari_base, hi, ARI_REQUEST_DATA_HI); ari_mmio_write_32(ari_base, evt_mask, ARI_REQUEST_EVENT_MASK); ari_mmio_write_32(ari_base, req | ARI_REQUEST_VALID_BIT | ARI_REQUEST_NS_BIT, ARI_REQUEST); while (timeout) { status = ari_mmio_read_32(ari_base, ARI_STATUS); if (!(status & (ARI_REQ_ONGOING | ARI_REQ_PENDING))) break; mdelay(1); timeout--; } if (!timeout) ret = -ETIMEDOUT; return ret; } static uint32_t get_ari_address_index(void) { uint64_t mpidr; uint32_t core_id, cluster_id; mpidr = read_cpuid_mpidr(); cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); return ((cluster_id * MAX_CORES_PER_CLUSTER) + core_id); } static int tegra23x_mce_read_versions(u32 *major, u32 *minor) { uint32_t cpu_idx; int32_t ret = 0; if (IS_ERR_OR_NULL(major) || IS_ERR_OR_NULL(minor)) return -EINVAL; preempt_disable(); cpu_idx = get_ari_address_index(); ret = ari_send_request(ari_bar_array[cpu_idx], 0U, (u32)TEGRA_ARI_VERSION, 0U, 0U); if (ret) return ret; *major = ari_get_response_low(ari_bar_array[cpu_idx]); *minor = ari_get_response_high(ari_bar_array[cpu_idx]); preempt_enable(); return 0; } /* * echo copies data from req_low to resp_low and * data from req_high to resp_high. */ static int tegra23x_mce_echo_data(u64 data, u64 *matched) { uint32_t cpu_idx; u32 input1 = (u32)(data & 0xFFFFFFFF); u32 input2 = (u32)(data >> 32); u64 out1, out2; int32_t ret = 0; if (IS_ERR_OR_NULL(matched)) return -EINVAL; preempt_disable(); cpu_idx = get_ari_address_index(); ret = ari_send_request(ari_bar_array[cpu_idx], 0U, (u32)TEGRA_ARI_ECHO, input1, input2); if (ret) return ret; out1 = (u64)ari_get_response_low(ari_bar_array[cpu_idx]); out2 = (u64)ari_get_response_high(ari_bar_array[cpu_idx]); *matched = ((out2 << 32) | out1); preempt_enable(); if (data == *matched) return 0; else return -ENOMSG; } static int tegra23x_mce_read_l4_cache_ways(u64 *value) { uint32_t cpu_idx; u64 out; int32_t ret = 0; preempt_disable(); cpu_idx = get_ari_address_index(); ret = ari_send_request(ari_bar_array[cpu_idx], 0U, (u32)TEGRA_ARI_CCPLEX_CACHE_CONTROL, 0U, 0U); if (ret) return ret; out = (u64)ari_get_response_low(ari_bar_array[cpu_idx]); *value = out; preempt_enable(); return 0; } static int tegra23x_mce_write_l4_cache_ways(u64 data, u64 *value) { uint32_t cpu_idx; u32 input = (u32)(data & 0x00001F1F); u64 out; int32_t ret = 0; if (IS_ERR_OR_NULL(value)) return -EINVAL; preempt_disable(); cpu_idx = get_ari_address_index(); input |= CACHE_WAYS_WRITE_EN_BIT; ret = ari_send_request(ari_bar_array[cpu_idx], 0U, (u32)TEGRA_ARI_CCPLEX_CACHE_CONTROL, input, 0U); if (ret) return ret; out = (u64)ari_get_response_low(ari_bar_array[cpu_idx]); *value = out; preempt_enable(); return 0; } static int tegra23x_mce_read_uncore_perfmon(u32 req, u32 *data) { uint32_t cpu_idx; u32 out_lo, out_hi; int32_t ret = 0; if (IS_ERR_OR_NULL(data)) return -EINVAL; preempt_disable(); cpu_idx = get_ari_address_index(); ret = ari_send_request(ari_bar_array[cpu_idx], 0U, (u32)TEGRA_ARI_PERFMON, req, 0U); if (ret) return ret; out_lo = ari_get_response_low(ari_bar_array[cpu_idx]); out_hi = ari_get_response_high(ari_bar_array[cpu_idx]); pr_debug("%s: read status = %u\n", __func__, out_lo); if (out_lo != 0) return -out_lo; *data = out_hi; preempt_enable(); return 0; } static int tegra23x_mce_write_uncore_perfmon(u32 req, u32 data) { uint32_t cpu_idx; u32 out_lo, out_hi; int32_t ret = 0; preempt_disable(); cpu_idx = get_ari_address_index(); ret = ari_send_request(ari_bar_array[cpu_idx], 0U, (u32)TEGRA_ARI_PERFMON, req, data); if (ret) return ret; out_lo = ari_get_response_low(ari_bar_array[cpu_idx]); out_hi = ari_get_response_high(ari_bar_array[cpu_idx]); pr_debug("%s: write status = %u\n", __func__, out_lo); if (out_lo != 0) return -out_lo; preempt_enable(); return 0; } static int tegra23x_mce_read_cstate_stats(u32 state, u64 *stats) { uint32_t cpu_idx; int32_t ret = 0; if (IS_ERR_OR_NULL(stats)) return -EINVAL; preempt_disable(); cpu_idx = get_ari_address_index(); ret = ari_send_request(ari_bar_array[cpu_idx], 0U, (u32)TEGRA_ARI_CSTATE_STAT_QUERY, state, 0U); if (ret) return ret; *stats = ari_get_response_low(ari_bar_array[cpu_idx]); preempt_enable(); return 0; } #ifdef CONFIG_DEBUG_FS static struct dentry *mce_debugfs; static int tegra23x_mce_versions_get(void *data, u64 *val) { u32 major = 0; u32 minor = 0; u64 version = 0; int ret; *val = 0; ret = tegra_mce_read_versions(&major, &minor); if (!ret) { version = (u64)major; *val = (version << 32) | minor; } return ret; } static int tegra23x_mce_echo_set(void *data, u64 val) { u64 matched = 0; int ret; ret = tegra_mce_echo_data(val, &matched); if (ret) return ret; return 0; } static void tegra_get_cpu_mpidr(void *mpidr) { *((u64 *)mpidr) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; } #define MCE_STAT_ID_SHIFT 16UL #define MAX_CSTATE_ENTRIES 3U #define MAX_CLUSTERS 3U struct cstats_req { char *name; uint32_t id; }; struct cstats_resp { uint32_t stats[3]; /* entries, entry_time_sum, exit_time_sum */ uint32_t log_id; }; static struct cstats_req core_req[MAX_CSTATE_ENTRIES] = { { "C7_ENTRIES", TEGRA_ARI_STAT_QUERY_C7_ENTRIES}, { "C7_ENTRY_TIME_SUM", TEGRA_ARI_STAT_QUERY_C7_ENTRY_TIME_SUM}, { "C7_EXIT_TIME_SUM", TEGRA_ARI_STAT_QUERY_C7_EXIT_TIME_SUM}, }; static struct cstats_req cluster_req[MAX_CSTATE_ENTRIES] = { { "CC7_ENTRIES", TEGRA_ARI_STAT_QUERY_CC7_ENTRIES}, { "CC7_ENTRY_TIME_SUM", TEGRA_ARI_STAT_QUERY_CC7_ENTRY_TIME_SUM}, { "CC7_EXIT_TIME_SUM", TEGRA_ARI_STAT_QUERY_CC7_EXIT_TIME_SUM}, }; static struct cstats_req system_req[MAX_CSTATE_ENTRIES] = { { "SC7_ENTRIES", TEGRA_ARI_STAT_QUERY_SC7_ENTRIES}, { "SC7_CCPLEX_ENTRY_TIME_SUM", TEGRA_ARI_STAT_QUERY_SC7_ENTRY_TIME_SUM}, { "SC7_CCPLEX_EXIT_TIME_SUM", TEGRA_ARI_STAT_QUERY_SC7_EXIT_TIME_SUM}, }; static int tegra23x_mce_dbg_cstats_show(struct seq_file *s, void *data) { u64 val; u32 mce_index; uint32_t cpu, mpidr_core, mpidr_cl, mpidr_lin, i, j; struct cstats_resp core_resp[MAX_CPUS] = { 0 }; struct cstats_resp cl_resp[MAX_CLUSTERS] = { 0 }; struct cstats_resp sys_resp = { 0 }; u64 mpidr = 0; for_each_online_cpu(cpu) { smp_call_function_single(cpu, tegra_get_cpu_mpidr, &mpidr, true); mpidr_cl = MPIDR_AFFINITY_LEVEL(mpidr, 2); mpidr_core = MPIDR_AFFINITY_LEVEL(mpidr, 1); mpidr_lin = ((mpidr_cl * MAX_CORES_PER_CLUSTER) + mpidr_core); /* core cstats */ for (i = 0; i < MAX_CSTATE_ENTRIES; i++) { mce_index = (core_req[i].id << MCE_STAT_ID_SHIFT) + mpidr_lin; if (tegra23x_mce_read_cstate_stats(mce_index, &val)) pr_err("mce: failed to read cstat: %x\n", mce_index); else { core_resp[mpidr_lin].stats[i] = val; core_resp[mpidr_lin].log_id = cpu; } } /* * cluster cstats * for multiple cores in the same cluster we end up calling * more than once. Optimize this later */ for (i = 0; i < MAX_CSTATE_ENTRIES; i++) { mce_index = (cluster_req[i].id << MCE_STAT_ID_SHIFT) + mpidr_cl; if (tegra23x_mce_read_cstate_stats(mce_index, &val)) pr_err("mce: failed to read cstat: %x\n", mce_index); else cl_resp[mpidr_cl].stats[i] = val; } } /* system cstats */ for (i = 0; i < MAX_CSTATE_ENTRIES; i++) { mce_index = (system_req[i].id << MCE_STAT_ID_SHIFT); if (tegra23x_mce_read_cstate_stats(mce_index, &val)) pr_err("mce: failed to read cstat: %x\n", mce_index); else sys_resp.stats[i] = val; } seq_puts(s, "System Power States\n"); seq_puts(s, "---------------------------------------------------\n"); seq_printf(s, "%-25s%-15s\n", "name", "count/time"); seq_puts(s, "---------------------------------------------------\n"); for (i = 0; i < MAX_CSTATE_ENTRIES; i++) seq_printf(s, "%-25s%-20u\n", system_req[i].name, sys_resp.stats[i]); seq_puts(s, "\nCluster Power States\n"); seq_puts(s, "---------------------------------------------------\n"); seq_printf(s, "%-25s%-15s%-15s\n", "name", "phy-id", "count/time"); seq_puts(s, "---------------------------------------------------\n"); for (j = 0; j < MAX_CLUSTERS; j++) { for (i = 0; i < MAX_CSTATE_ENTRIES; i++) seq_printf(s, "%-25s%-15d%-20u\n", cluster_req[i].name, j, cl_resp[j].stats[i]); } seq_puts(s, "\nCore Power States\n"); seq_puts(s, "-------------------------------------------------------------------\n"); seq_printf(s, "%-25s%-15s%-15s%-15s\n", "name", "mpidr-lin", "log-id", "count/time"); seq_puts(s, "-------------------------------------------------------------------\n"); for (j = 0; j < MAX_CPUS; j++) { for (i = 0; i < MAX_CSTATE_ENTRIES; i++) seq_printf(s, "%-25s%-15d%-15u%-20u\n", core_req[i].name, j, core_resp[j].log_id, core_resp[j].stats[i]); } return 0; } static int tegra23x_mce_dbg_cstats_open(struct inode *inode, struct file *file) { int (*f)(struct seq_file *s, void *data); f = tegra23x_mce_dbg_cstats_show; return single_open(file, f, inode->i_private); } static const struct file_operations tegra23x_mce_cstats_fops = { .open = tegra23x_mce_dbg_cstats_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; DEFINE_SIMPLE_ATTRIBUTE(tegra23x_mce_versions_fops, tegra23x_mce_versions_get, NULL, "%llx\n"); DEFINE_SIMPLE_ATTRIBUTE(tegra23x_mce_echo_fops, NULL, tegra23x_mce_echo_set, "%llx\n"); struct debugfs_entry { const char *name; const struct file_operations *fops; mode_t mode; }; /* Make sure to put an NULL entry at the end of each group */ static struct debugfs_entry tegra23x_mce_attrs[] = { { "versions", &tegra23x_mce_versions_fops, 0444 }, { "echo", &tegra23x_mce_echo_fops, 0200 }, { "cstats", &tegra23x_mce_cstats_fops, 0444 }, { NULL, NULL, 0 } }; static struct debugfs_entry *tegra_mce_attrs = tegra23x_mce_attrs; static int tegra23x_mce_init(void) { struct debugfs_entry *fent; struct dentry *dent; int ret; mce_debugfs = debugfs_create_dir("tegra_mce", NULL); if (!mce_debugfs) return -ENOMEM; for (fent = tegra_mce_attrs; fent->name; fent++) { dent = debugfs_create_file(fent->name, fent->mode, mce_debugfs, NULL, fent->fops); if (IS_ERR_OR_NULL(dent)) { ret = dent ? PTR_ERR(dent) : -EINVAL; pr_err("%s: failed to create debugfs (%s): %d\n", __func__, fent->name, ret); goto err; } } pr_debug("%s: init finished\n", __func__); return 0; err: debugfs_remove_recursive(mce_debugfs); return ret; } static void tegra23x_mce_exit(void) { debugfs_remove_recursive(mce_debugfs); } #endif /* CONFIG_DEBUG_FS */ static struct tegra_mce_ops t23x_mce_ops = { .read_versions = tegra23x_mce_read_versions, .read_l3_cache_ways = tegra23x_mce_read_l4_cache_ways, .write_l3_cache_ways = tegra23x_mce_write_l4_cache_ways, .echo_data = tegra23x_mce_echo_data, .read_uncore_perfmon = tegra23x_mce_read_uncore_perfmon, .write_uncore_perfmon = tegra23x_mce_write_uncore_perfmon, .read_cstate_stats = tegra23x_mce_read_cstate_stats, }; static int t23x_mce_probe(struct platform_device *pdev) { unsigned int cpu; struct resource *res; /* this ARI NS mapping applies to Split, Lock-step and FS */ for (cpu = 0; cpu < MAX_CPUS; cpu++) { res = platform_get_resource(pdev, IORESOURCE_MEM, cpu); ari_bar_array[cpu] = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(ari_bar_array[cpu])) { dev_err(&pdev->dev, "mapping ARI failed for %d\n", cpu); return PTR_ERR(ari_bar_array[cpu]); } } tegra_mce_set_ops(&t23x_mce_ops); #ifdef CONFIG_DEBUG_FS tegra23x_mce_init(); #endif return 0; } static int t23x_mce_remove(struct platform_device *pdev) { #ifdef CONFIG_DEBUG_FS tegra23x_mce_exit(); #endif return 0; } static const struct of_device_id t23x_mce_of_match[] = { { .compatible = "nvidia,t23x-mce", .data = NULL }, { }, }; MODULE_DEVICE_TABLE(of, t23x_mce_of_match); static struct platform_driver t23x_mce_driver = { .probe = t23x_mce_probe, .remove = t23x_mce_remove, .driver = { .owner = THIS_MODULE, .name = "t23x-mce", .of_match_table = of_match_ptr(t23x_mce_of_match), }, }; module_platform_driver(t23x_mce_driver); MODULE_DESCRIPTION("NVIDIA Tegra23x MCE driver"); MODULE_AUTHOR("NVIDIA Corporation"); MODULE_LICENSE("GPL v2");