diff --git a/drivers/platform/tegra/rtcpu/hsp-mailbox-client.c b/drivers/platform/tegra/rtcpu/hsp-mailbox-client.c index 2e74c99a..c218d47d 100644 --- a/drivers/platform/tegra/rtcpu/hsp-mailbox-client.c +++ b/drivers/platform/tegra/rtcpu/hsp-mailbox-client.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. #include "linux/tegra-hsp-combo.h" @@ -39,6 +39,8 @@ struct camrtc_hsp { wait_queue_head_t response_waitq; atomic_t response; long timeout; + /* callback function for panic message */ + void (*panic_callback)(struct device *dev); }; struct camrtc_hsp_op { @@ -54,6 +56,35 @@ struct camrtc_hsp_op { int (*set_operating_point)(struct camrtc_hsp *, u32 operating_point, long *timeout); }; +/** + * @brief Registers a callback function to be called when a panic message is received + * + * This function registers a callback function that will be called when a panic message + * is received from the RCE. + * - Validates the HSP context pointer + * - Sets the panic_callback function pointer + * + * @param[in] camhsp Pointer to the camera HSP context + * Valid value: non-NULL + * @param[in] panic_callback Function to be called when a panic message is received + * Valid value: non-NULL function pointer or NULL to clear + * + * @retval 0 On successful registration + * @retval -EINVAL If the HSP context is NULL + */ +int camrtc_hsp_set_panic_callback(struct camrtc_hsp *camhsp, + void (*panic_callback)(struct device *dev)) +{ + if (camhsp == NULL) { + dev_err(&camhsp->dev, "%s: camhsp is NULL!\n", __func__); + return -EINVAL; + } + + camhsp->panic_callback = panic_callback; + return 0; +} +EXPORT_SYMBOL(camrtc_hsp_set_panic_callback); + /** * @brief Sends a request message over the HSP mailbox * @@ -209,9 +240,15 @@ static void camrtc_hsp_rx_full_notify(mbox_client *cl, void *data) if (CAMRTC_HSP_MSG_ID(msg) == CAMRTC_HSP_IRQ) { /* We are done here */ + } else if (CAMRTC_HSP_MSG_ID(msg) == CAMRTC_HSP_PANIC) { + dev_err(&camhsp->dev, "%s: receive CAMRTC_HSP_PANIC message!\n", __func__); + if (camhsp->panic_callback != NULL) { + camhsp->panic_callback(camhsp->dev.parent); + } else { + dev_warn(&camhsp->dev, "%s: No panic callback function is registered.\n", __func__); + } } else if (CAMRTC_HSP_MSG_ID(msg) < CAMRTC_HSP_HELLO) { /* Rest of the unidirectional messages are now ignored */ - dev_info(&camhsp->dev, "unknown message 0x%08x\n", msg); } else { atomic_set(&camhsp->response, msg); wake_up(&camhsp->response_waitq); @@ -1315,6 +1352,7 @@ struct camrtc_hsp *camrtc_hsp_create( init_waitqueue_head(&camhsp->response_waitq); init_completion(&camhsp->emptied); atomic_set(&camhsp->response, -1); + camhsp->panic_callback = NULL; camhsp->dev.type = &camrtc_hsp_combo_dev_type; camhsp->dev.release = camrtc_hsp_combo_dev_release; diff --git a/drivers/platform/tegra/rtcpu/tegra-camera-rtcpu-base.c b/drivers/platform/tegra/rtcpu/tegra-camera-rtcpu-base.c index 1943da5b..37394c19 100644 --- a/drivers/platform/tegra/rtcpu/tegra-camera-rtcpu-base.c +++ b/drivers/platform/tegra/rtcpu/tegra-camera-rtcpu-base.c @@ -1282,6 +1282,44 @@ static int tegra_cam_rtcpu_runtime_idle(struct device *dev) return 0; } +/** + * @brief Callback function triggered upon receiving CAMRTC_HSP_PANIC message. + * + * This function is registered with the HSP mailbox client. When an RCE panic + * occurs, this callback retrieves the RTCPU tracer associated with the device + * and flushes the snapshot portion of the trace buffer to capture RCE state + * at the time of panic. + * + * Checks for NULL input parameters (`dev`, `rtcpu`, `tracer`) before proceeding. + * + * @param[in] dev Pointer to the parent device associated with the HSP client. + * Must not be NULL. Used to retrieve driver data. + */ +void rtcpu_trace_panic_callback(struct device *dev) +{ + struct tegra_cam_rtcpu *rtcpu = NULL; + struct tegra_rtcpu_trace *tracer = NULL; + if (dev == NULL) { + dev_err(dev, "%s: input dev handle is null\n", __func__); + return; + } + + rtcpu = dev_get_drvdata(dev); + if (rtcpu == NULL) { + dev_err(dev, "%s: input rtcpu handle is null\n", __func__); + return; + } + + tracer = rtcpu->tracer; + if (tracer == NULL) { + dev_err(dev, "%s: input tracer handle is null\n", __func__); + return; + } + + rtcpu_trace_snapshot(tracer); +} +EXPORT_SYMBOL(rtcpu_trace_panic_callback); + /** * @brief Initialize the HSP for the camera RTCPU * @@ -1312,9 +1350,19 @@ static int tegra_camrtc_hsp_init(struct device *dev) if (IS_ERR(rtcpu->hsp)) { err = PTR_ERR(rtcpu->hsp); rtcpu->hsp = NULL; + dev_err(dev, "%s: failed to create hsp, err=%d\n", __func__, err); return err; } + /* Register panic callback to capture trace on RCE panic */ + if (rtcpu->hsp && rtcpu->tracer) { + err = camrtc_hsp_set_panic_callback(rtcpu->hsp, rtcpu_trace_panic_callback); + if (err < 0) + dev_err(dev, "%s: failed to set panic callback, err=%d\n", __func__, err); + } else { + dev_err(dev, "%s: cannot register RCE panic callback.\n", __func__); + } + return 0; } diff --git a/drivers/platform/tegra/rtcpu/tegra-rtcpu-trace.c b/drivers/platform/tegra/rtcpu/tegra-rtcpu-trace.c index 36db483d..5ac6802d 100644 --- a/drivers/platform/tegra/rtcpu/tegra-rtcpu-trace.c +++ b/drivers/platform/tegra/rtcpu/tegra-rtcpu-trace.c @@ -73,19 +73,23 @@ struct tegra_rtcpu_trace { /* pointers to each block */ void *exceptions_base; struct camrtc_event_struct *events; + struct camrtc_event_struct *snapshot_events; dma_addr_t dma_handle_pointers; dma_addr_t dma_handle_exceptions; dma_addr_t dma_handle_events; + dma_addr_t dma_handle_snapshots; /* limit */ u32 exception_entries; u32 event_entries; + u32 snapshot_entries; /* exception pointer */ u32 exception_last_idx; /* last pointer */ u32 event_last_idx; + u32 snapshot_last_idx; /* worker */ struct delayed_work work; @@ -94,6 +98,7 @@ struct tegra_rtcpu_trace { /* statistics */ u32 n_exceptions; u64 n_events; + u32 n_snapshots; /* copy of the latest exception and event */ char last_exception_str[EXCEPTION_STR_LENGTH]; @@ -172,7 +177,7 @@ static int rtcpu_trace_setup_memory(struct tegra_rtcpu_trace *tracer) ret = of_parse_phandle_with_fixed_args(dev->of_node, NV(trace), 3, 0, ®_spec); if (unlikely(ret != 0)) { - dev_err(dev, "Cannot find trace entry\n"); + dev_err(dev, "%s: cannot find trace entry\n", __func__); return -EINVAL; } @@ -220,6 +225,7 @@ static void rtcpu_trace_init_memory(struct tegra_rtcpu_trace *tracer) { u64 add_value = 0; u32 sub_value = 0; + u32 CAMRTC_TRACE_SNAPSHOT_OFFSET = 0; if (unlikely(check_add_overflow(tracer->dma_handle, (u64)(offsetof(struct camrtc_trace_memory_header, @@ -232,6 +238,8 @@ static void rtcpu_trace_init_memory(struct tegra_rtcpu_trace *tracer) /* memory map */ tracer->dma_handle_pointers = add_value; + + // exception section setup tracer->exceptions_base = tracer->trace_memory + CAMRTC_TRACE_EXCEPTION_OFFSET; tracer->exception_entries = 7; @@ -244,18 +252,48 @@ static void rtcpu_trace_init_memory(struct tegra_rtcpu_trace *tracer) } tracer->dma_handle_exceptions = add_value; - tracer->events = tracer->trace_memory + CAMRTC_TRACE_EVENT_OFFSET; - if (unlikely(check_sub_overflow(tracer->trace_memory_size, + // event section setup + tracer->events = tracer->trace_memory + CAMRTC_TRACE_EVENT_OFFSET; + CAMRTC_TRACE_SNAPSHOT_OFFSET = + tracer->trace_memory_size - (CAMRTC_TRACE_SNAPSHOT_ENTRIES * CAMRTC_TRACE_EVENT_SIZE); + if (unlikely(check_sub_overflow(CAMRTC_TRACE_SNAPSHOT_OFFSET, CAMRTC_TRACE_EVENT_OFFSET, &sub_value))) { dev_err(tracer->dev, "%s:trace_memory_size failed due to an overflow\n", __func__); return; } - tracer->event_entries = sub_value / CAMRTC_TRACE_EVENT_SIZE; + + if (unlikely(check_add_overflow(tracer->dma_handle, + (u64)(CAMRTC_TRACE_EVENT_OFFSET), &add_value))) { + dev_err(tracer->dev, + "%s:dma_handle failed due to an overflow\n", __func__); + return; + } tracer->dma_handle_events = add_value; + // snapshot section setup + dev_dbg(tracer->dev, "%s: setup snapshot section\n", __func__); + tracer->snapshot_events = tracer->trace_memory + CAMRTC_TRACE_SNAPSHOT_OFFSET; + + tracer->snapshot_entries = CAMRTC_TRACE_SNAPSHOT_ENTRIES; + if (unlikely(check_add_overflow(tracer->dma_handle, + (u64)(CAMRTC_TRACE_SNAPSHOT_OFFSET), &add_value))) { + dev_err(tracer->dev, + "%s:dma_handle for snapshots failed due to an overflow\n", __func__); + return; + } + tracer->dma_handle_snapshots = add_value; + + dev_dbg(tracer->dev, "%s: exception section: offset=%x, size=%u, entries=%u\n", __func__, + CAMRTC_TRACE_EXCEPTION_OFFSET, CAMRTC_TRACE_EXCEPTION_SIZE, tracer->exception_entries); + dev_dbg(tracer->dev, "%s: event section: offset=%x, size=%u, entries=%u\n", __func__, + CAMRTC_TRACE_EVENT_OFFSET, CAMRTC_TRACE_EVENT_SIZE, tracer->event_entries); + dev_dbg(tracer->dev, "%s: snapshot section: offset=%x, size=%u, entries=%u\n", __func__, + CAMRTC_TRACE_SNAPSHOT_OFFSET, CAMRTC_TRACE_EVENT_SIZE, tracer->snapshot_entries); + dev_dbg(tracer->dev, "%s: total trace memory size=%u\n", __func__, tracer->trace_memory_size); + { struct camrtc_trace_memory_header header = { .tlv.tag = CAMRTC_TAG_NV_TRCON, @@ -267,6 +305,9 @@ static void rtcpu_trace_init_memory(struct tegra_rtcpu_trace *tracer) .event_offset = CAMRTC_TRACE_EVENT_OFFSET, .event_size = CAMRTC_TRACE_EVENT_SIZE, .event_entries = tracer->event_entries, + .snapshot_offset = CAMRTC_TRACE_SNAPSHOT_OFFSET, + .snapshot_size = CAMRTC_TRACE_EVENT_SIZE, + .snapshot_entries = tracer->snapshot_entries, }; memcpy(tracer->trace_memory, &header, sizeof(header)); @@ -1872,6 +1913,90 @@ static inline void rtcpu_trace_events(struct tegra_rtcpu_trace *tracer) tracer->copy_last_event = *last_event; } +/** + * @brief Processes RTCPU snapshot trace events from shared memory. + * + * This function reads and processes snapshot trace events recorded by the RTCPU + * into a dedicated circular buffer in shared memory. It is similar to + * rtcpu_trace_events but operates on the snapshot buffer. + * + * - Acquires the tracer mutex lock. + * - Reads the current snapshot write index (@ref snapshot_next_idx) from the + * shared memory header. + * - Compares it with the last processed index (@ref tracer->snapshot_last_idx) + * to find new events. + * - Returns early if the tracer is NULL or no new events are found. + * - Validates the new index using @ref array_index_nospec. + * - Invalidates CPU cache for the relevant memory range using + * @ref rtcpu_trace_invalidate_entries to ensure visibility of RTCPU writes. + * - Iterates through new events from the old index up to (but not including) + * the new index, handling potential buffer wrap-around. + * - For each event: + * - Validates the index using @ref array_index_nospec. + * - Gets a pointer to the event structure in the snapshot buffer. + * - Processes the event using @ref rtcpu_trace_event. + * - Increments the snapshot event counter (@ref tracer->n_snapshots). + * - Advances the index, handling wrap-around. + * - Updates the last processed snapshot index (@ref tracer->snapshot_last_idx). + * - Releases the tracer mutex lock. + * + * @param[in/out] tracer Pointer to the tegra_rtcpu_trace structure. + * Valid Range: Non-NULL pointer. + */ +void rtcpu_trace_snapshot(struct tegra_rtcpu_trace *tracer) +{ + const struct camrtc_trace_memory_header *header = NULL; + u32 old_next = 0U; + u32 new_next = 0U; + struct camrtc_event_struct *event = NULL; + + if (tracer == NULL) + return; + + mutex_lock(&tracer->lock); + + header = tracer->trace_memory; + old_next = tracer->snapshot_last_idx; + new_next = header->snapshot_next_idx; + + if (new_next >= tracer->snapshot_entries) { + dev_warn_ratelimited(tracer->dev, + "trace entry %u outside range 0..%u\n", + new_next, tracer->snapshot_entries - 1); + mutex_unlock(&tracer->lock); + return; + } + + new_next = array_index_nospec(new_next, tracer->snapshot_entries); + + if (old_next == new_next) { + mutex_unlock(&tracer->lock); + return; + } + + rtcpu_trace_invalidate_entries(tracer, + tracer->dma_handle_snapshots, + old_next, new_next, + CAMRTC_TRACE_EVENT_SIZE, + tracer->snapshot_entries); + + dev_err(tracer->dev, "%s: dump snapshot start at %u, end at %u\n", + __func__, old_next, new_next); + while (old_next != new_next) { + old_next = array_index_nospec(old_next, tracer->snapshot_entries); + event = &tracer->snapshot_events[old_next]; + rtcpu_trace_event(tracer, event); + tracer->n_snapshots = wrap_add_u32(tracer->n_snapshots, 1U); + old_next = wrap_add_u32(old_next, 1U); + if (old_next == tracer->snapshot_entries) + old_next = 0; + } + + tracer->snapshot_last_idx = new_next; + mutex_unlock(&tracer->lock); +} +EXPORT_SYMBOL(rtcpu_trace_snapshot); + /** * @brief Flushes the RTCPU trace buffer * @@ -2441,8 +2566,8 @@ static int rtcpu_trace_debugfs_stats_read( { struct tegra_rtcpu_trace *tracer = file->private; - seq_printf(file, "Exceptions: %u\nEvents: %llu\n", - tracer->n_exceptions, tracer->n_events); + seq_printf(file, "Exceptions: %u\nEvents: %llu\nSnapshots: %u\n", + tracer->n_exceptions, tracer->n_events, tracer->n_snapshots); return 0; } @@ -2673,8 +2798,10 @@ struct tegra_rtcpu_trace *tegra_rtcpu_trace_create(struct device *dev, int ret; tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); - if (unlikely(tracer == NULL)) + if (unlikely(tracer == NULL)) { + dev_err(dev, "%s: failed to allocate tracer\n", __func__); return NULL; + } tracer->dev = dev; mutex_init(&tracer->lock); @@ -2682,7 +2809,7 @@ struct tegra_rtcpu_trace *tegra_rtcpu_trace_create(struct device *dev, /* Get the trace memory */ ret = rtcpu_trace_setup_memory(tracer); if (ret) { - dev_err(dev, "Trace memory setup failed: %d\n", ret); + dev_err(dev, "%s: failed to setup trace memory, err=%d\n", __func__, ret); kfree(tracer); return NULL; } @@ -2729,7 +2856,7 @@ struct tegra_rtcpu_trace *tegra_rtcpu_trace_create(struct device *dev, /* Worker */ param = WORK_INTERVAL_DEFAULT; if (of_property_read_u32(tracer->of_node, NV(interval-ms), ¶m)) { - dev_err(dev, "interval-ms property not present\n"); + dev_err(dev, "%s: interval-ms property not present\n", __func__); kfree(tracer); return NULL; } @@ -2740,7 +2867,7 @@ struct tegra_rtcpu_trace *tegra_rtcpu_trace_create(struct device *dev, tracer->log_prefix = "[RTCPU]"; if (of_property_read_string(tracer->of_node, NV(log-prefix), &tracer->log_prefix)) { - dev_err(dev, "RTCPU property not present\n"); + dev_err(dev, "%s: RTCPU property not present\n", __func__); kfree(tracer); return NULL; } @@ -2756,7 +2883,7 @@ struct tegra_rtcpu_trace *tegra_rtcpu_trace_create(struct device *dev, ret = raw_trace_node_drv_register(tracer); if (ret) { - dev_err(dev, "Failed to register device node\n"); + dev_err(dev, "%s: failed to register device node, err=%d\n", __func__, ret); kfree(tracer); return NULL; } diff --git a/include/linux/tegra-hsp-combo.h b/include/linux/tegra-hsp-combo.h index adecb2aa..12ccbc18 100644 --- a/include/linux/tegra-hsp-combo.h +++ b/include/linux/tegra-hsp-combo.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ #ifndef INCLUDE_RTCPU_HSP_COMBO_H @@ -32,5 +32,8 @@ int camrtc_hsp_get_fw_hash(struct camrtc_hsp *camhsp, int camrtc_hsp_set_operating_point(struct camrtc_hsp *camhsp, uint32_t operating_point); +int camrtc_hsp_set_panic_callback(struct camrtc_hsp *camhsp, + void (*panic_callback)(struct device *dev)); + #endif /* INCLUDE_RTCPU_HSP_COMBO_H */ diff --git a/include/linux/tegra-rtcpu-trace.h b/include/linux/tegra-rtcpu-trace.h index 96884ad0..8e34ac15 100644 --- a/include/linux/tegra-rtcpu-trace.h +++ b/include/linux/tegra-rtcpu-trace.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ #ifndef _LINUX_TEGRA_RTCPU_TRACE_H_ @@ -17,5 +17,7 @@ struct tegra_rtcpu_trace *tegra_rtcpu_trace_create( int tegra_rtcpu_trace_boot_sync(struct tegra_rtcpu_trace *tracer); void tegra_rtcpu_trace_flush(struct tegra_rtcpu_trace *tracer); void tegra_rtcpu_trace_destroy(struct tegra_rtcpu_trace *tracer); +void rtcpu_trace_snapshot(struct tegra_rtcpu_trace *tracer); +void rtcpu_trace_panic_callback(struct device *dev); #endif