camera: dump out RCE snapshot on CAMRTC_HSP_PANIC

- implemented a panic callback function which dumps out
  the snapshot section of the RCE trace buffer.
- registered the panic callback function with hsp handle
  during handle init time.
- when receiving the CAMRTC_HSP_PANIC from RCE, trigger
  the panic callback function to dump out the RCE snapshot.

Jira CAMERASW-32243

Change-Id: I523a0b51637a6cf1091d578195c75090b52ffcd7
Signed-off-by: yizhou <yizhou@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3341536
GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: Shiva Dubey <sdubey@nvidia.com>
Reviewed-by: Vincent Chung <vincentc@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
yizhou
2025-04-15 23:37:19 +00:00
committed by Jon Hunter
parent d68f89d225
commit 4736728ec1
5 changed files with 233 additions and 15 deletions

View File

@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#include "linux/tegra-hsp-combo.h" #include "linux/tegra-hsp-combo.h"
@@ -39,6 +39,8 @@ struct camrtc_hsp {
wait_queue_head_t response_waitq; wait_queue_head_t response_waitq;
atomic_t response; atomic_t response;
long timeout; long timeout;
/* callback function for panic message */
void (*panic_callback)(struct device *dev);
}; };
struct camrtc_hsp_op { struct camrtc_hsp_op {
@@ -54,6 +56,35 @@ struct camrtc_hsp_op {
int (*set_operating_point)(struct camrtc_hsp *, u32 operating_point, long *timeout); int (*set_operating_point)(struct camrtc_hsp *, u32 operating_point, long *timeout);
}; };
/**
* @brief Registers a callback function to be called when a panic message is received
*
* This function registers a callback function that will be called when a panic message
* is received from the RCE.
* - Validates the HSP context pointer
* - Sets the panic_callback function pointer
*
* @param[in] camhsp Pointer to the camera HSP context
* Valid value: non-NULL
* @param[in] panic_callback Function to be called when a panic message is received
* Valid value: non-NULL function pointer or NULL to clear
*
* @retval 0 On successful registration
* @retval -EINVAL If the HSP context is NULL
*/
int camrtc_hsp_set_panic_callback(struct camrtc_hsp *camhsp,
void (*panic_callback)(struct device *dev))
{
if (camhsp == NULL) {
dev_err(&camhsp->dev, "%s: camhsp is NULL!\n", __func__);
return -EINVAL;
}
camhsp->panic_callback = panic_callback;
return 0;
}
EXPORT_SYMBOL(camrtc_hsp_set_panic_callback);
/** /**
* @brief Sends a request message over the HSP mailbox * @brief Sends a request message over the HSP mailbox
* *
@@ -209,9 +240,15 @@ static void camrtc_hsp_rx_full_notify(mbox_client *cl, void *data)
if (CAMRTC_HSP_MSG_ID(msg) == CAMRTC_HSP_IRQ) { if (CAMRTC_HSP_MSG_ID(msg) == CAMRTC_HSP_IRQ) {
/* We are done here */ /* We are done here */
} else if (CAMRTC_HSP_MSG_ID(msg) == CAMRTC_HSP_PANIC) {
dev_err(&camhsp->dev, "%s: receive CAMRTC_HSP_PANIC message!\n", __func__);
if (camhsp->panic_callback != NULL) {
camhsp->panic_callback(camhsp->dev.parent);
} else {
dev_warn(&camhsp->dev, "%s: No panic callback function is registered.\n", __func__);
}
} else if (CAMRTC_HSP_MSG_ID(msg) < CAMRTC_HSP_HELLO) { } else if (CAMRTC_HSP_MSG_ID(msg) < CAMRTC_HSP_HELLO) {
/* Rest of the unidirectional messages are now ignored */ /* Rest of the unidirectional messages are now ignored */
dev_info(&camhsp->dev, "unknown message 0x%08x\n", msg);
} else { } else {
atomic_set(&camhsp->response, msg); atomic_set(&camhsp->response, msg);
wake_up(&camhsp->response_waitq); wake_up(&camhsp->response_waitq);
@@ -1315,6 +1352,7 @@ struct camrtc_hsp *camrtc_hsp_create(
init_waitqueue_head(&camhsp->response_waitq); init_waitqueue_head(&camhsp->response_waitq);
init_completion(&camhsp->emptied); init_completion(&camhsp->emptied);
atomic_set(&camhsp->response, -1); atomic_set(&camhsp->response, -1);
camhsp->panic_callback = NULL;
camhsp->dev.type = &camrtc_hsp_combo_dev_type; camhsp->dev.type = &camrtc_hsp_combo_dev_type;
camhsp->dev.release = camrtc_hsp_combo_dev_release; camhsp->dev.release = camrtc_hsp_combo_dev_release;

View File

@@ -1282,6 +1282,44 @@ static int tegra_cam_rtcpu_runtime_idle(struct device *dev)
return 0; return 0;
} }
/**
* @brief Callback function triggered upon receiving CAMRTC_HSP_PANIC message.
*
* This function is registered with the HSP mailbox client. When an RCE panic
* occurs, this callback retrieves the RTCPU tracer associated with the device
* and flushes the snapshot portion of the trace buffer to capture RCE state
* at the time of panic.
*
* Checks for NULL input parameters (`dev`, `rtcpu`, `tracer`) before proceeding.
*
* @param[in] dev Pointer to the parent device associated with the HSP client.
* Must not be NULL. Used to retrieve driver data.
*/
void rtcpu_trace_panic_callback(struct device *dev)
{
struct tegra_cam_rtcpu *rtcpu = NULL;
struct tegra_rtcpu_trace *tracer = NULL;
if (dev == NULL) {
dev_err(dev, "%s: input dev handle is null\n", __func__);
return;
}
rtcpu = dev_get_drvdata(dev);
if (rtcpu == NULL) {
dev_err(dev, "%s: input rtcpu handle is null\n", __func__);
return;
}
tracer = rtcpu->tracer;
if (tracer == NULL) {
dev_err(dev, "%s: input tracer handle is null\n", __func__);
return;
}
rtcpu_trace_snapshot(tracer);
}
EXPORT_SYMBOL(rtcpu_trace_panic_callback);
/** /**
* @brief Initialize the HSP for the camera RTCPU * @brief Initialize the HSP for the camera RTCPU
* *
@@ -1312,9 +1350,19 @@ static int tegra_camrtc_hsp_init(struct device *dev)
if (IS_ERR(rtcpu->hsp)) { if (IS_ERR(rtcpu->hsp)) {
err = PTR_ERR(rtcpu->hsp); err = PTR_ERR(rtcpu->hsp);
rtcpu->hsp = NULL; rtcpu->hsp = NULL;
dev_err(dev, "%s: failed to create hsp, err=%d\n", __func__, err);
return err; return err;
} }
/* Register panic callback to capture trace on RCE panic */
if (rtcpu->hsp && rtcpu->tracer) {
err = camrtc_hsp_set_panic_callback(rtcpu->hsp, rtcpu_trace_panic_callback);
if (err < 0)
dev_err(dev, "%s: failed to set panic callback, err=%d\n", __func__, err);
} else {
dev_err(dev, "%s: cannot register RCE panic callback.\n", __func__);
}
return 0; return 0;
} }

View File

@@ -73,19 +73,23 @@ struct tegra_rtcpu_trace {
/* pointers to each block */ /* pointers to each block */
void *exceptions_base; void *exceptions_base;
struct camrtc_event_struct *events; struct camrtc_event_struct *events;
struct camrtc_event_struct *snapshot_events;
dma_addr_t dma_handle_pointers; dma_addr_t dma_handle_pointers;
dma_addr_t dma_handle_exceptions; dma_addr_t dma_handle_exceptions;
dma_addr_t dma_handle_events; dma_addr_t dma_handle_events;
dma_addr_t dma_handle_snapshots;
/* limit */ /* limit */
u32 exception_entries; u32 exception_entries;
u32 event_entries; u32 event_entries;
u32 snapshot_entries;
/* exception pointer */ /* exception pointer */
u32 exception_last_idx; u32 exception_last_idx;
/* last pointer */ /* last pointer */
u32 event_last_idx; u32 event_last_idx;
u32 snapshot_last_idx;
/* worker */ /* worker */
struct delayed_work work; struct delayed_work work;
@@ -94,6 +98,7 @@ struct tegra_rtcpu_trace {
/* statistics */ /* statistics */
u32 n_exceptions; u32 n_exceptions;
u64 n_events; u64 n_events;
u32 n_snapshots;
/* copy of the latest exception and event */ /* copy of the latest exception and event */
char last_exception_str[EXCEPTION_STR_LENGTH]; char last_exception_str[EXCEPTION_STR_LENGTH];
@@ -172,7 +177,7 @@ static int rtcpu_trace_setup_memory(struct tegra_rtcpu_trace *tracer)
ret = of_parse_phandle_with_fixed_args(dev->of_node, NV(trace), ret = of_parse_phandle_with_fixed_args(dev->of_node, NV(trace),
3, 0, &reg_spec); 3, 0, &reg_spec);
if (unlikely(ret != 0)) { if (unlikely(ret != 0)) {
dev_err(dev, "Cannot find trace entry\n"); dev_err(dev, "%s: cannot find trace entry\n", __func__);
return -EINVAL; return -EINVAL;
} }
@@ -220,6 +225,7 @@ static void rtcpu_trace_init_memory(struct tegra_rtcpu_trace *tracer)
{ {
u64 add_value = 0; u64 add_value = 0;
u32 sub_value = 0; u32 sub_value = 0;
u32 CAMRTC_TRACE_SNAPSHOT_OFFSET = 0;
if (unlikely(check_add_overflow(tracer->dma_handle, if (unlikely(check_add_overflow(tracer->dma_handle,
(u64)(offsetof(struct camrtc_trace_memory_header, (u64)(offsetof(struct camrtc_trace_memory_header,
@@ -232,6 +238,8 @@ static void rtcpu_trace_init_memory(struct tegra_rtcpu_trace *tracer)
/* memory map */ /* memory map */
tracer->dma_handle_pointers = add_value; tracer->dma_handle_pointers = add_value;
// exception section setup
tracer->exceptions_base = tracer->trace_memory + tracer->exceptions_base = tracer->trace_memory +
CAMRTC_TRACE_EXCEPTION_OFFSET; CAMRTC_TRACE_EXCEPTION_OFFSET;
tracer->exception_entries = 7; tracer->exception_entries = 7;
@@ -244,18 +252,48 @@ static void rtcpu_trace_init_memory(struct tegra_rtcpu_trace *tracer)
} }
tracer->dma_handle_exceptions = add_value; tracer->dma_handle_exceptions = add_value;
tracer->events = tracer->trace_memory + CAMRTC_TRACE_EVENT_OFFSET;
if (unlikely(check_sub_overflow(tracer->trace_memory_size, // event section setup
tracer->events = tracer->trace_memory + CAMRTC_TRACE_EVENT_OFFSET;
CAMRTC_TRACE_SNAPSHOT_OFFSET =
tracer->trace_memory_size - (CAMRTC_TRACE_SNAPSHOT_ENTRIES * CAMRTC_TRACE_EVENT_SIZE);
if (unlikely(check_sub_overflow(CAMRTC_TRACE_SNAPSHOT_OFFSET,
CAMRTC_TRACE_EVENT_OFFSET, &sub_value))) { CAMRTC_TRACE_EVENT_OFFSET, &sub_value))) {
dev_err(tracer->dev, dev_err(tracer->dev,
"%s:trace_memory_size failed due to an overflow\n", __func__); "%s:trace_memory_size failed due to an overflow\n", __func__);
return; return;
} }
tracer->event_entries = sub_value / CAMRTC_TRACE_EVENT_SIZE; tracer->event_entries = sub_value / CAMRTC_TRACE_EVENT_SIZE;
if (unlikely(check_add_overflow(tracer->dma_handle,
(u64)(CAMRTC_TRACE_EVENT_OFFSET), &add_value))) {
dev_err(tracer->dev,
"%s:dma_handle failed due to an overflow\n", __func__);
return;
}
tracer->dma_handle_events = add_value; tracer->dma_handle_events = add_value;
// snapshot section setup
dev_dbg(tracer->dev, "%s: setup snapshot section\n", __func__);
tracer->snapshot_events = tracer->trace_memory + CAMRTC_TRACE_SNAPSHOT_OFFSET;
tracer->snapshot_entries = CAMRTC_TRACE_SNAPSHOT_ENTRIES;
if (unlikely(check_add_overflow(tracer->dma_handle,
(u64)(CAMRTC_TRACE_SNAPSHOT_OFFSET), &add_value))) {
dev_err(tracer->dev,
"%s:dma_handle for snapshots failed due to an overflow\n", __func__);
return;
}
tracer->dma_handle_snapshots = add_value;
dev_dbg(tracer->dev, "%s: exception section: offset=%x, size=%u, entries=%u\n", __func__,
CAMRTC_TRACE_EXCEPTION_OFFSET, CAMRTC_TRACE_EXCEPTION_SIZE, tracer->exception_entries);
dev_dbg(tracer->dev, "%s: event section: offset=%x, size=%u, entries=%u\n", __func__,
CAMRTC_TRACE_EVENT_OFFSET, CAMRTC_TRACE_EVENT_SIZE, tracer->event_entries);
dev_dbg(tracer->dev, "%s: snapshot section: offset=%x, size=%u, entries=%u\n", __func__,
CAMRTC_TRACE_SNAPSHOT_OFFSET, CAMRTC_TRACE_EVENT_SIZE, tracer->snapshot_entries);
dev_dbg(tracer->dev, "%s: total trace memory size=%u\n", __func__, tracer->trace_memory_size);
{ {
struct camrtc_trace_memory_header header = { struct camrtc_trace_memory_header header = {
.tlv.tag = CAMRTC_TAG_NV_TRCON, .tlv.tag = CAMRTC_TAG_NV_TRCON,
@@ -267,6 +305,9 @@ static void rtcpu_trace_init_memory(struct tegra_rtcpu_trace *tracer)
.event_offset = CAMRTC_TRACE_EVENT_OFFSET, .event_offset = CAMRTC_TRACE_EVENT_OFFSET,
.event_size = CAMRTC_TRACE_EVENT_SIZE, .event_size = CAMRTC_TRACE_EVENT_SIZE,
.event_entries = tracer->event_entries, .event_entries = tracer->event_entries,
.snapshot_offset = CAMRTC_TRACE_SNAPSHOT_OFFSET,
.snapshot_size = CAMRTC_TRACE_EVENT_SIZE,
.snapshot_entries = tracer->snapshot_entries,
}; };
memcpy(tracer->trace_memory, &header, sizeof(header)); memcpy(tracer->trace_memory, &header, sizeof(header));
@@ -1872,6 +1913,90 @@ static inline void rtcpu_trace_events(struct tegra_rtcpu_trace *tracer)
tracer->copy_last_event = *last_event; tracer->copy_last_event = *last_event;
} }
/**
* @brief Processes RTCPU snapshot trace events from shared memory.
*
* This function reads and processes snapshot trace events recorded by the RTCPU
* into a dedicated circular buffer in shared memory. It is similar to
* rtcpu_trace_events but operates on the snapshot buffer.
*
* - Acquires the tracer mutex lock.
* - Reads the current snapshot write index (@ref snapshot_next_idx) from the
* shared memory header.
* - Compares it with the last processed index (@ref tracer->snapshot_last_idx)
* to find new events.
* - Returns early if the tracer is NULL or no new events are found.
* - Validates the new index using @ref array_index_nospec.
* - Invalidates CPU cache for the relevant memory range using
* @ref rtcpu_trace_invalidate_entries to ensure visibility of RTCPU writes.
* - Iterates through new events from the old index up to (but not including)
* the new index, handling potential buffer wrap-around.
* - For each event:
* - Validates the index using @ref array_index_nospec.
* - Gets a pointer to the event structure in the snapshot buffer.
* - Processes the event using @ref rtcpu_trace_event.
* - Increments the snapshot event counter (@ref tracer->n_snapshots).
* - Advances the index, handling wrap-around.
* - Updates the last processed snapshot index (@ref tracer->snapshot_last_idx).
* - Releases the tracer mutex lock.
*
* @param[in/out] tracer Pointer to the tegra_rtcpu_trace structure.
* Valid Range: Non-NULL pointer.
*/
void rtcpu_trace_snapshot(struct tegra_rtcpu_trace *tracer)
{
const struct camrtc_trace_memory_header *header = NULL;
u32 old_next = 0U;
u32 new_next = 0U;
struct camrtc_event_struct *event = NULL;
if (tracer == NULL)
return;
mutex_lock(&tracer->lock);
header = tracer->trace_memory;
old_next = tracer->snapshot_last_idx;
new_next = header->snapshot_next_idx;
if (new_next >= tracer->snapshot_entries) {
dev_warn_ratelimited(tracer->dev,
"trace entry %u outside range 0..%u\n",
new_next, tracer->snapshot_entries - 1);
mutex_unlock(&tracer->lock);
return;
}
new_next = array_index_nospec(new_next, tracer->snapshot_entries);
if (old_next == new_next) {
mutex_unlock(&tracer->lock);
return;
}
rtcpu_trace_invalidate_entries(tracer,
tracer->dma_handle_snapshots,
old_next, new_next,
CAMRTC_TRACE_EVENT_SIZE,
tracer->snapshot_entries);
dev_err(tracer->dev, "%s: dump snapshot start at %u, end at %u\n",
__func__, old_next, new_next);
while (old_next != new_next) {
old_next = array_index_nospec(old_next, tracer->snapshot_entries);
event = &tracer->snapshot_events[old_next];
rtcpu_trace_event(tracer, event);
tracer->n_snapshots = wrap_add_u32(tracer->n_snapshots, 1U);
old_next = wrap_add_u32(old_next, 1U);
if (old_next == tracer->snapshot_entries)
old_next = 0;
}
tracer->snapshot_last_idx = new_next;
mutex_unlock(&tracer->lock);
}
EXPORT_SYMBOL(rtcpu_trace_snapshot);
/** /**
* @brief Flushes the RTCPU trace buffer * @brief Flushes the RTCPU trace buffer
* *
@@ -2441,8 +2566,8 @@ static int rtcpu_trace_debugfs_stats_read(
{ {
struct tegra_rtcpu_trace *tracer = file->private; struct tegra_rtcpu_trace *tracer = file->private;
seq_printf(file, "Exceptions: %u\nEvents: %llu\n", seq_printf(file, "Exceptions: %u\nEvents: %llu\nSnapshots: %u\n",
tracer->n_exceptions, tracer->n_events); tracer->n_exceptions, tracer->n_events, tracer->n_snapshots);
return 0; return 0;
} }
@@ -2673,8 +2798,10 @@ struct tegra_rtcpu_trace *tegra_rtcpu_trace_create(struct device *dev,
int ret; int ret;
tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
if (unlikely(tracer == NULL)) if (unlikely(tracer == NULL)) {
dev_err(dev, "%s: failed to allocate tracer\n", __func__);
return NULL; return NULL;
}
tracer->dev = dev; tracer->dev = dev;
mutex_init(&tracer->lock); mutex_init(&tracer->lock);
@@ -2682,7 +2809,7 @@ struct tegra_rtcpu_trace *tegra_rtcpu_trace_create(struct device *dev,
/* Get the trace memory */ /* Get the trace memory */
ret = rtcpu_trace_setup_memory(tracer); ret = rtcpu_trace_setup_memory(tracer);
if (ret) { if (ret) {
dev_err(dev, "Trace memory setup failed: %d\n", ret); dev_err(dev, "%s: failed to setup trace memory, err=%d\n", __func__, ret);
kfree(tracer); kfree(tracer);
return NULL; return NULL;
} }
@@ -2729,7 +2856,7 @@ struct tegra_rtcpu_trace *tegra_rtcpu_trace_create(struct device *dev,
/* Worker */ /* Worker */
param = WORK_INTERVAL_DEFAULT; param = WORK_INTERVAL_DEFAULT;
if (of_property_read_u32(tracer->of_node, NV(interval-ms), &param)) { if (of_property_read_u32(tracer->of_node, NV(interval-ms), &param)) {
dev_err(dev, "interval-ms property not present\n"); dev_err(dev, "%s: interval-ms property not present\n", __func__);
kfree(tracer); kfree(tracer);
return NULL; return NULL;
} }
@@ -2740,7 +2867,7 @@ struct tegra_rtcpu_trace *tegra_rtcpu_trace_create(struct device *dev,
tracer->log_prefix = "[RTCPU]"; tracer->log_prefix = "[RTCPU]";
if (of_property_read_string(tracer->of_node, NV(log-prefix), if (of_property_read_string(tracer->of_node, NV(log-prefix),
&tracer->log_prefix)) { &tracer->log_prefix)) {
dev_err(dev, "RTCPU property not present\n"); dev_err(dev, "%s: RTCPU property not present\n", __func__);
kfree(tracer); kfree(tracer);
return NULL; return NULL;
} }
@@ -2756,7 +2883,7 @@ struct tegra_rtcpu_trace *tegra_rtcpu_trace_create(struct device *dev,
ret = raw_trace_node_drv_register(tracer); ret = raw_trace_node_drv_register(tracer);
if (ret) { if (ret) {
dev_err(dev, "Failed to register device node\n"); dev_err(dev, "%s: failed to register device node, err=%d\n", __func__, ret);
kfree(tracer); kfree(tracer);
return NULL; return NULL;
} }

View File

@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */ /* SPDX-License-Identifier: GPL-2.0-only */
/* /*
* Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*/ */
#ifndef INCLUDE_RTCPU_HSP_COMBO_H #ifndef INCLUDE_RTCPU_HSP_COMBO_H
@@ -32,5 +32,8 @@ int camrtc_hsp_get_fw_hash(struct camrtc_hsp *camhsp,
int camrtc_hsp_set_operating_point(struct camrtc_hsp *camhsp, int camrtc_hsp_set_operating_point(struct camrtc_hsp *camhsp,
uint32_t operating_point); uint32_t operating_point);
int camrtc_hsp_set_panic_callback(struct camrtc_hsp *camhsp,
void (*panic_callback)(struct device *dev));
#endif /* INCLUDE_RTCPU_HSP_COMBO_H */ #endif /* INCLUDE_RTCPU_HSP_COMBO_H */

View File

@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */ /* SPDX-License-Identifier: GPL-2.0-only */
/* /*
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*/ */
#ifndef _LINUX_TEGRA_RTCPU_TRACE_H_ #ifndef _LINUX_TEGRA_RTCPU_TRACE_H_
@@ -17,5 +17,7 @@ struct tegra_rtcpu_trace *tegra_rtcpu_trace_create(
int tegra_rtcpu_trace_boot_sync(struct tegra_rtcpu_trace *tracer); int tegra_rtcpu_trace_boot_sync(struct tegra_rtcpu_trace *tracer);
void tegra_rtcpu_trace_flush(struct tegra_rtcpu_trace *tracer); void tegra_rtcpu_trace_flush(struct tegra_rtcpu_trace *tracer);
void tegra_rtcpu_trace_destroy(struct tegra_rtcpu_trace *tracer); void tegra_rtcpu_trace_destroy(struct tegra_rtcpu_trace *tracer);
void rtcpu_trace_snapshot(struct tegra_rtcpu_trace *tracer);
void rtcpu_trace_panic_callback(struct device *dev);
#endif #endif