diff --git a/drivers/video/tegra/host/pva/Makefile b/drivers/video/tegra/host/pva/Makefile index b7afe806..a3f990a2 100644 --- a/drivers/video/tegra/host/pva/Makefile +++ b/drivers/video/tegra/host/pva/Makefile @@ -63,6 +63,7 @@ pva_objs += \ $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_device_api.o \ $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_device_memory.o \ $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_driver.o \ + $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_event_trace.o \ $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_ioctl.o \ $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_isr.o \ $(PVA_SYS_DIR)/src/kmd/linux/pva_kmd_linux_misc.o \ diff --git a/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h b/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h index d5c79c78..f59b3b21 100644 --- a/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h +++ b/drivers/video/tegra/host/pva/src/fw/include/pva_fw.h @@ -65,6 +65,7 @@ struct pva_fw_cmdbuf_submit_info { uint32_t execution_timeout_ms; struct pva_fw_memory_addr output_statuses[PVA_MAX_NUM_OUTPUT_STATUS]; struct pva_fw_postfence postfences[PVA_MAX_NUM_POSTFENCES]; + uint64_t submit_id; }; /* This is the header of the circular buffer */ @@ -282,7 +283,7 @@ enum pva_fw_timestamp_t { * message. KMD can further parse these messages to extract the exact size of the * message. */ -#define PVA_KMD_FW_BUF_ELEMENT_SIZE (sizeof(uint32_t) + sizeof(uint64_t)) +#define PVA_KMD_FW_BUF_ELEMENT_SIZE sizeof(struct pva_kmd_fw_msg_vpu_trace) // TODO: remove element size and buffer size fields from this struct. // This struct is shared between KMD and FW. FW should not be able to change @@ -297,6 +298,47 @@ struct pva_fw_shared_buffer_header { uint32_t tail; }; +struct pva_kmd_fw_buffer_msg_header { +#define PVA_KMD_FW_BUF_MSG_TYPE_FW_EVENT 0 +#define PVA_KMD_FW_BUF_MSG_TYPE_VPU_TRACE 1 +#define PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG 2 + uint32_t type : 8; + // Size of payload in bytes. Includes the size of the header. + uint32_t size : 24; +}; + +// Tracing information for NSIGHT +struct pva_kmd_fw_msg_vpu_trace { + // VPU ID on which the job was executed + uint8_t engine_id; + // CCQ ID through which the job was submitted + uint8_t ccq_id; + // Queue ID through which the job was submitted + // This is not relative to a context. It ranges from 0 to 55 + uint8_t queue_id; + // Number of prefences in the cmdbuf + uint8_t num_prefences; + // Program ID of the VPU program executed. + // Not supported today as CUPVA does not fully support this yet. + // The intent is to for user applications to be able to assign + // an identification to a VPU kernel. This ID will then be forwarded + // by the FW to the KMD for tracing. + uint64_t prog_id; + // Start time of the VPU execution + uint64_t vpu_start_time; + // End time of the VPU execution + uint64_t vpu_end_time; + // Submit ID of the cmdbuf + // User applications can assign distinct identifiers to command buffers. + // FW will forward this identifier to the KMD for tracing. + uint64_t submit_id; +}; + +// Resource unregister message +struct pva_kmd_fw_msg_res_unreg { + uint32_t resource_id; +}; + struct pva_kmd_fw_tegrastats { uint64_t window_start_time; uint64_t window_end_time; diff --git a/drivers/video/tegra/host/pva/src/include/pva_api.h b/drivers/video/tegra/host/pva/src/include/pva_api.h index 303a3e4a..7d7d4878 100644 --- a/drivers/video/tegra/host/pva/src/include/pva_api.h +++ b/drivers/video/tegra/host/pva/src/include/pva_api.h @@ -12,6 +12,7 @@ extern "C" { #include "pva_api_dma.h" #include "pva_api_vpu.h" #include "pva_api_cmdbuf.h" +#include "pva_api_ops.h" /* Core APIs */ @@ -131,135 +132,6 @@ enum pva_error pva_executable_get_symbols(struct pva_context *ctx, struct pva_symbol_info *out_info, uint32_t max_num_symbols); -/** - * @brief Submit a list of asynchronous registration operations to KMD. - * - * The operations can be: - * - Memory registration - * - Executable registration - * - DMA config registration - * - * The response buffer will contain the resource IDs of the registered - * resources. Any command buffers that use these resources should wait on the - * returned post fence. - * - * @param[in] ctx Pointer to the context. - * @param[in] fence Pointer to the post fence to wait on. If NULL, it means the - * caller is not interested in waiting. This usually only applies to unregister - * operations. - * @param[in] Input buffer containing the list of operations. - * @param[out] Output buffer to store the response. - * - * @note Input and output buffer may be the same buffer. - */ -enum pva_error pva_ops_submit_async(struct pva_context *ctx, - struct pva_fence *fence, - struct pva_ops_buffer const *input_buffer, - struct pva_ops_buffer *output_buffer); - -/** - * @brief Perform a list of registration operations synchronously. - * - * The operations can be: - * - Memory registration - * - Executable registration - * - DMA config registration - * - * The response buffer will contain the resource IDs of the registered - * resources. - * - * @param[in] ctx Pointer to the context. - * @param[in] Input buffer containing the list of operations. - * @param[out] Output buffer to store the response. - * - * @note Input and output buffer may be the same buffer. - * - */ -enum pva_error pva_ops_submit(struct pva_context *ctx, - struct pva_ops_buffer const *input_buffer, - struct pva_ops_buffer *output_buffer); - -/** Size of the ops buffer header. When user allocates memory for ops buffer, - * this size needs to be added. */ -#define PVA_OPS_BUFFER_HEADER_SIZE 64 -/** - * @brief Initialize pva_ops_buffer to keep track of the state of - * operations buffer during preparation. - * - * @param[out] buf_handle Pointer to the pva_ops_buffer object to initialize. - * @param[in] buf Pointer to the buffer that will store the operations. - * @param[in] size Size of the buffer. - */ -enum pva_error pva_ops_buffer_init(struct pva_ops_buffer *buf_handle, void *buf, - uint32_t size); - -#define PVA_OPS_MEMORY_REG_SIZE 64 -/** - * @brief Append a memory registration operation to the operations buffer. - * - * @param[in] ctx Pointer to the context. - * @param[in] mem Pointer to the memory to register. - * @param[in] segment Memory segment to register. - * @param[in] access_flags Access flags for the memory. - * @param[out] op_buf Pointer to the operations buffer. - */ -enum pva_error pva_ops_append_memory_register(struct pva_context *ctx, - struct pva_memory *mem, - enum pva_memory_segment segment, - uint32_t access_flags, - struct pva_ops_buffer *op_buf); -#define PVA_OPS_EXEC_REG_HEADER_SIZE 16 -/** - * @brief Append an executable registration operation to the operations. - * - * @param[in] ctx Pointer to the context. - * @param[in] executable Pointer to the executable binary content. - * @param[in] executable_size Size of the executable. - * @param[out] op_buf Pointer to the operations buffer. - */ -enum pva_error pva_ops_append_executable_register( - struct pva_context *ctx, void const *executable, - uint32_t executable_size, struct pva_ops_buffer *op_buf); - -#define PVA_OPS_DMA_CONFIG_REG_SIZE (24 * 1024) -/** - * @brief Append a DMA config registration operation to the operations. - * @param[in] ctx Pointer to the context. - * @param[in] dma_config Pointer to the DMA config. - * @param[out] op_buf Pointer to the operations buffer. - */ -enum pva_error -pva_ops_append_dma_config_register(struct pva_context *ctx, - struct pva_dma_config const *dma_config, - struct pva_ops_buffer *op_buf); - -#define PVA_OPS_UNREG_SIZE 16 -enum pva_error pva_ops_append_unregister(struct pva_context *ctx, - uint32_t resource_id, - struct pva_ops_buffer *op_buf); - -/** - * @brief Parse the response buffer to get the resource ID of the registered - * memory or DMA configuration. - * - * @param[in] resp_buf Pointer to the response buffer. - * @param[out] resource_id output resource ID. - */ -enum pva_error pva_ops_parse_register_resp(struct pva_ops_buffer *resp_buf, - uint32_t *resource_id); - -/** - * @brief Parse the response buffer to get the resource ID of the registered - * executable. - * - * @param[in] resp_buf Pointer to the response buffer. - * @param[out] num_symbols Number of symbols in the executable. - * @param[out] resource_id output resource ID. - */ -enum pva_error pva_ops_parse_exec_register_resp(struct pva_ops_buffer *op_buf, - uint32_t *num_symbols, - uint32_t *resource_id); - #define PVA_DATA_CHANNEL_HEADER_SIZE 32 /** * @brief Initialize VPU print buffer @@ -308,6 +180,26 @@ enum pva_error pva_memory_duplicate(struct pva_memory *src, void pva_memory_get_attrs(struct pva_memory const *mem, struct pva_memory_attrs *out_attrs); +/** + * @brief Create an import ID for memory registration. + * + * The ID must be destroyed after registration. + * + * @param[in] ctx Pointer to the context. + * @param[in] mem Pointer to the memory. + * @param[out] out_import_id Pointer to the import ID. + */ +enum pva_error pva_memory_import_id_create(struct pva_context *ctx, + struct pva_memory *mem, + uint64_t *out_import_id); + +/** + * @brief Destroy an import ID. + * + * @param[in] import_id Import ID to destroy. + */ +enum pva_error pva_memory_import_id_destroy(uint64_t import_id); + /** \brief Specifies the PVA system software major version. */ #define PVA_SYSSW_MAJOR_VERSION (2U) diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h b/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h index 86575778..340714df 100644 --- a/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h +++ b/drivers/video/tegra/host/pva/src/include/pva_api_cmdbuf.h @@ -635,7 +635,13 @@ struct pva_cmd_set_debug_log_level { uint32_t log_level; }; -#define PVA_CMD_PRIV_OPCODE_COUNT 13U +struct pva_cmd_set_profiling_level { +#define PVA_CMD_OPCODE_SET_PROFILING_LEVEL (13U | PVA_CMD_PRIV_OPCODE_FLAG) + struct pva_cmd_header header; + uint32_t level; +}; + +#define PVA_CMD_PRIV_OPCODE_COUNT 14U #define PVA_MAX_CMDBUF_CHUNK_LEN 1024 #define PVA_MAX_CMDBUF_CHUNK_SIZE (sizeof(uint32_t) * PVA_MAX_CMDBUF_CHUNK_LEN) diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_dma.h b/drivers/video/tegra/host/pva/src/include/pva_api_dma.h index 9f6b86ee..18b4864a 100644 --- a/drivers/video/tegra/host/pva/src/include/pva_api_dma.h +++ b/drivers/video/tegra/host/pva/src/include/pva_api_dma.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + #ifndef PVA_API_DMA_H #define PVA_API_DMA_H #include "pva_api_types.h" @@ -282,15 +283,6 @@ struct pva_dma_config_header { uint16_t base_hwseq_word; uint16_t num_hwseq_words; uint32_t vpu_exec_resource_id; - - /* For serialized version of pva_dma_config, the following fields follow - * immediately after this header. The starting addresses of these fields - * must be aligned to 8 bytes */ - - /* An array of hwseq words */ - /* An array of pva_dma_channel */ - /* An array of pva_dma_descriptor */ - /* An array of pva_dma_slot_buffer */ }; enum pva_dma_static_binding_type { @@ -324,12 +316,4 @@ struct pva_dma_static_binding { }; }; -struct pva_dma_config { - struct pva_dma_config_header header; - uint32_t *hwseq_words; - struct pva_dma_channel *channels; - struct pva_dma_descriptor *descriptors; - struct pva_dma_static_binding *static_bindings; -}; - #endif // PVA_API_DMA_H diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_ops.h b/drivers/video/tegra/host/pva/src/include/pva_api_ops.h new file mode 100644 index 00000000..217e0f8c --- /dev/null +++ b/drivers/video/tegra/host/pva/src/include/pva_api_ops.h @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef PVA_API_OPS_H +#define PVA_API_OPS_H +#include "pva_api_types.h" +#include "pva_api_dma.h" + +/** + * @brief Structure describing memory used by PVA KMD operations. + */ +struct pva_ops_memory { + uint32_t handle; /**< Memory handle */ + uint32_t size; /**< Size of memory */ + void *va; /**< Virtual address */ +}; + +/** + * @brief Structure describing the state of the operation buffer being appended + * to/parsed from. + * + * Valid data is between [start_offset, end_offset). The reader/consumer + * advances the start_offset, while the writer/producer advances the end_offset. + * Consequently, when used as an input buffer, PVA KMD reads from start_offset + * to end_offset; when used as an output buffer, PVA KMD appends to end_offset + * until memory->size is reached. + */ +struct pva_ops_buffer { + struct pva_ops_memory *memory; /**< Pointer to buffer memory */ + uint32_t start_offset; /**< Start offset in buffer memory */ + uint32_t end_offset; /**< End offset (exclusive) in buffer memory */ +}; + +/** + * @brief Alignment requirement for PVA operations. + * + * All PVA operation starting offset must be 8 bytes aligned. The fixed-size + * operation structs are already explictedly aligned. The starting offset of + * arrays in variable-sized operations (e.g. DMA configuration) must be manually + * aligned to 8 bytes. + */ +#define PVA_OPS_ALIGNMENT 8U + +/** + * @brief Header structure for PVA operations. + */ +struct pva_ops_header { + uint32_t opcode; /**< Operation code identifying the operation type */ + /** Size of the operation in bytes. This size must be a multiple of 8 bytes. */ + uint32_t size; +}; + +/** + * @brief Structure for executable registration operation. + */ +struct pva_ops_executable_register { +#define PVA_OPS_OPCODE_EXECUTABLE_REGISTER 1U + struct pva_ops_header header; /**< Operation header */ + uint32_t exec_size; /**< Size of executable data */ + uint32_t pad; /**< Padding for 8 bytes alignment */ + //followed by executable data +}; + +/** + * @brief Parse the response buffer to get the resource ID of the registered + * executable. + * + * @param[in] resp_buf Pointer to the response buffer. + * @param[out] num_symbols Number of symbols in the executable. + * @param[out] resource_id output resource ID. + */ +enum pva_error pva_ops_parse_exec_register_resp(struct pva_ops_buffer *op_buf, + uint32_t *num_symbols, + uint32_t *resource_id); + +/** + * @brief Structure for DMA configuration registration operation. + */ +struct pva_ops_dma_config_register { +#define PVA_OPS_OPCODE_DMA_CONFIG_REGISTER 2U + struct pva_ops_header header; /**< Operation header */ + struct pva_dma_config_header + dma_config_header; /**< DMA configuration header */ + uint32_t channels_offset; + uint32_t descriptors_offset; + uint32_t hwseq_words_offset; + uint32_t static_bindings_offset; + //followed by channels, descriptors, hwseq_words, static_bindings +}; + +/** + * @brief Append a memory registration operation to the operations buffer. + * + * @param[in] ctx Pointer to the context. + * @param[in] mem Pointer to the memory to register. + * @param[in] import_id Import ID created by pva_memory_import_id_create + * @param[in] segment Memory segment to register. + * @param[in] access_flags Access flags for the memory. + * @param[out] op_buf Pointer to the operations buffer. + */ +enum pva_error pva_ops_append_memory_register(struct pva_context *ctx, + struct pva_memory *mem, + uint64_t import_id, + enum pva_memory_segment segment, + uint32_t access_flags, + struct pva_ops_buffer *op_buf); + +/** + * @brief Parse the response buffer to get the resource ID of the registered + * memory or DMA configuration. + * + * @param[in] resp_buf Pointer to the response buffer. + * @param[out] resource_id output resource ID. + */ +enum pva_error pva_ops_parse_register_resp(struct pva_ops_buffer *resp_buf, + uint32_t *resource_id); + +/** + * @brief Append a resource unregistration operation to the operations buffer. + * + * @param[in] ctx Pointer to the context. + * @param[in] resource_id Resource ID to unregister. + * @param[out] op_buf Pointer to the operations buffer. + */ +enum pva_error pva_ops_append_unregister(struct pva_context *ctx, + uint32_t resource_id, + struct pva_ops_buffer *op_buf); + +/** + * @brief Parse the response buffer to get the result of the unregister operation. + * + * @param[in] resp_buf Pointer to the response buffer. + */ +enum pva_error pva_ops_parse_unregister_resp(struct pva_ops_buffer *resp_buf); + +/** + * @brief Allocate memory for operations buffer. + * + * This memory is directly accessible by the PVA KMD. + * + * @param[in] ctx Pointer to PVA context. + * @param[in] size Size of buffer to allocate. + * @param[out] ops_buf Pointer to operations buffer memory structure. + * + * @return PVA_SUCCESS on success, appropriate error code otherwise. + */ +enum pva_error pva_ops_memory_alloc(struct pva_context *ctx, uint32_t size, + struct pva_ops_memory *ops_buf); + +/** + * @brief Free operations buffer memory. + * + * @param[in] ctx Pointer to PVA context. + * @param[in] ops_buf Pointer to operations buffer memory to free. + */ +void pva_ops_memory_free(struct pva_context *ctx, + struct pva_ops_memory *ops_buf); + +/** + * @brief Submit operations buffer synchronously to PVA KMD for processing. + * + * This function submits a buffer of operations to the KMD and waits for FW + * acknowledgement synchronously. The KMD will process each operation in the + * input buffer sequentially and write responses to the output buffer in the + * same order. + * + * @param[in] ctx Pointer to PVA context. + * @param[in] input_buffer Input operations buffer containing operations to + * process. + * @param[out] output_buffer Output operations buffer where responses will be + * written. Must have sufficient space for all + * responses. It is guaranteed that each response will + * not be longer than the corresponding operation. + * + * @retval PVA_SUCCESS All operations were processed by KMD and responses + * written, though individual operations may have failed. + * Parse output buffer to check per-operation status. + * @return Other error codes if KMD was not able to process all operations or + * was not able to write all responses. + */ +enum pva_error pva_ops_submit(struct pva_context *ctx, + struct pva_ops_buffer *input_buffer, + struct pva_ops_buffer *output_buffer); +/** + * @brief Submit operations buffer asynchronously to PVA KMD for processing. + * + * Identical to pva_ops_submit, but does not wait for FW acknowledgement. + * + * This function submits a buffer of operations to the KMD. The KMD will NOT + * wait for FW acknowledgement. The KMD will process each operation in the input + * buffer sequentially and write responses to the output buffer in the same + * order. For any command buffers that wish to use the resource IDs returned by + * this function, they must attach the fence as pre-fence. + * + * @param[in] ctx Pointer to PVA context. + * @param[in] input_buffer Input operations buffer containing operations to + * process. + * @param[out] output_buffer Output operations buffer where responses will be + * written. Must have sufficient space for all + * responses. It is guaranteed that each response + * will not be longer than the corresponding + * operation. + * @param[in] fence Optional fence to signal when operations complete. If NULL, + * no fence will be signaled, but KMD still will not wait for + * FW acknowledgement. + * +* @retval PVA_SUCCESS All operations were processed by KMD and responses + * written, though individual operations may have failed. + * Parse output buffer to check per-operation status. + * @return Other error codes if KMD was not able to process all operations or + * was not able to write all responses. + */ +enum pva_error pva_ops_submit_async(struct pva_context *ctx, + struct pva_ops_buffer *input_buffer, + struct pva_ops_buffer *output_buffer, + struct pva_fence *fence); + +#endif // PVA_API_OPS_H diff --git a/drivers/video/tegra/host/pva/src/include/pva_api_types.h b/drivers/video/tegra/host/pva/src/include/pva_api_types.h index d6cfefe1..7df62f89 100644 --- a/drivers/video/tegra/host/pva/src/include/pva_api_types.h +++ b/drivers/video/tegra/host/pva/src/include/pva_api_types.h @@ -117,6 +117,7 @@ ACT(PVA_ERR_PPE_ILLEGAL_INSTR) \ ACT(PVA_ERR_MATH_OP) \ ACT(PVA_ERR_HWSEQ_INVALID) \ + ACT(PVA_ERR_FW_ABORTED) \ ACT(PVA_ERR_CODE_COUNT) enum pva_error { @@ -324,6 +325,8 @@ struct pva_cmdbuf_submit_info { uint16_t first_chunk_size; /** Resource ID of the first chunk */ uint32_t first_chunk_resource_id; + /** User provided submission identifier */ + uint64_t submit_id; /** Offset of the first chunk within the resource */ uint64_t first_chunk_offset; #define PVA_EXEC_TIMEOUT_REUSE 0xFFFFFFFFU @@ -337,12 +340,6 @@ struct pva_cmdbuf_submit_info { struct pva_dram_addr timestamps[PVA_MAX_NUM_TIMESTAMPS]; }; -struct pva_ops_buffer { - void *base; /**< Buffer holding a list of async operations */ - uint32_t offset; /**< First unused byte in the buffer */ - uint32_t size; /**< Size of the buffer */ -}; - struct pva_cmdbuf_status { /** Timestamp reflecting when the status was updated. This is in resolution of ns */ uint64_t timestamp; diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h index d4072383..2e8d3d59 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_cmdbuf.h @@ -278,4 +278,13 @@ static inline void pva_kmd_set_cmd_deinit_shared_dram_buffer( cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); cmd->interface = interface; } + +static inline void +pva_kmd_set_cmd_set_profiling_level(struct pva_cmd_set_profiling_level *cmd, + uint32_t level) +{ + cmd->header.opcode = PVA_CMD_OPCODE_SET_PROFILING_LEVEL; + cmd->header.len = sizeof(*cmd) / sizeof(uint32_t); + cmd->level = level; +} #endif // PVA_KMD_CMDBUF_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_co.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_co.h new file mode 100644 index 00000000..79b2cdfc --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_co.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef PVA_KMD_CO_H +#define PVA_KMD_CO_H + +struct pva_co_info { + uint64_t base_va; + uint64_t base_pa; + uint64_t size; +}; + +#endif //PVA_KMD_CO_H \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c index 3134b2b9..3d2ac145 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_context.c @@ -277,10 +277,11 @@ enum pva_error pva_kmd_context_init(struct pva_kmd_context *ctx, goto deinit_submitter; } - err = pva_kmd_shared_buffer_init( - ctx->pva, ctx->ccq_id, PVA_KMD_FW_BUF_ELEMENT_SIZE, - res_table_capacity, pva_kmd_handle_msg_resource_unreg, - pva_kmd_resource_table_lock, pva_kmd_resource_table_unlock); + err = pva_kmd_shared_buffer_init(ctx->pva, ctx->ccq_id, + PVA_KMD_FW_BUF_ELEMENT_SIZE, + res_table_capacity, + pva_kmd_resource_table_lock, + pva_kmd_resource_table_unlock); if (err != PVA_SUCCESS) { goto deinit_submitter; } @@ -337,18 +338,17 @@ void pva_kmd_context_deinit(struct pva_kmd_context *ctx) static void pva_kmd_destroy_all_queues(struct pva_kmd_context *ctx) { enum pva_error err; - struct pva_kmd_queue_destroy_in_args args; - struct pva_kmd_queue *queue; for (uint32_t queue_id = 0u; queue_id < ctx->max_n_queues; queue_id++) { + struct pva_kmd_queue *queue; + pva_kmd_mutex_lock(&ctx->queue_allocator.allocator_lock); queue = pva_kmd_get_block_unsafe(&ctx->queue_allocator, queue_id); if (queue != NULL) { pva_kmd_mutex_unlock( &ctx->queue_allocator.allocator_lock); - args.queue_id = queue_id; - err = pva_kmd_queue_destroy(ctx, &args); + err = pva_kmd_queue_destroy(ctx, queue_id); ASSERT(err == PVA_SUCCESS); } else { pva_kmd_mutex_unlock( diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c index 08404261..3fdbd3c9 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.c @@ -10,17 +10,147 @@ #include "pva_kmd_vpu_app_auth.h" #include "pva_kmd_shared_buffer.h" +static uint64_t read_from_buffer_to_user(void *to, uint64_t count, + uint64_t offset, const void *from, + uint64_t available) +{ + if (offset >= available || !count) { + return 0; + } + if (count > available - offset) { + count = available - offset; + } + if (pva_kmd_copy_data_to_user(to, (uint8_t *)from + offset, count)) { + pva_kmd_log_err("failed to copy read buffer to user"); + return 0; + } + return count; +} + +static enum pva_error +pva_kmd_notify_fw_set_profiling_level(struct pva_kmd_device *pva, + uint32_t level) +{ + struct pva_kmd_cmdbuf_builder builder; + struct pva_kmd_submitter *dev_submitter = &pva->submitter; + struct pva_cmd_set_profiling_level *cmd; + uint32_t fence_val; + enum pva_error err; + + err = pva_kmd_submitter_prepare(dev_submitter, &builder); + if (err != PVA_SUCCESS) { + goto err_out; + } + + cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd)); + ASSERT(cmd != NULL); + pva_kmd_set_cmd_set_profiling_level(cmd, level); + + err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); + if (err != PVA_SUCCESS) { + goto err_out; + } + + err = pva_kmd_submitter_wait(dev_submitter, fence_val, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Waiting for FW timed out when setting profiling level"); + goto err_out; + } + + return PVA_SUCCESS; + +err_out: + return err; +} + +static int64_t profiling_level_read(struct pva_kmd_device *dev, void *file_data, + uint8_t *out_buffer, uint64_t offset, + uint64_t size) +{ + char kernel_buffer[256]; + int64_t formatted_len; + + /* Format the string only once */ + formatted_len = snprintf(kernel_buffer, sizeof(kernel_buffer), "%u\n", + dev->debugfs_context.profiling_level); + + if (formatted_len <= 0) { + return 0; + } + + formatted_len++; // Account for null terminator + + return read_from_buffer_to_user(out_buffer, size, offset, kernel_buffer, + formatted_len); +} + +static int64_t profiling_level_write(struct pva_kmd_device *dev, + void *file_data, const uint8_t *data, + uint64_t offset, uint64_t size) +{ + char kernel_buffer[256]; + uint32_t value; + + if (size >= sizeof(kernel_buffer)) { + return 0; + } + + if (pva_kmd_copy_data_from_user(kernel_buffer, data, size)) { + pva_kmd_log_err("failed to copy write buffer from user"); + return 0; + } + + kernel_buffer[size] = '\0'; + if (sscanf(kernel_buffer, "%u", &value) != 1) { + return 0; + } + + dev->debugfs_context.profiling_level = value; + + if (pva_kmd_device_maybe_on(dev)) { + enum pva_error err; + err = pva_kmd_device_busy(dev); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "pva_kmd_device_busy failed when submitting set profiling level cmd"); + return 0; + } + err = pva_kmd_notify_fw_set_profiling_level(dev, value); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Failed to notify FW about profiling level change"); + return 0; + } + pva_kmd_device_idle(dev); + } + return size; +} + void pva_kmd_debugfs_create_nodes(struct pva_kmd_device *pva) { static const char *vpu_ocd_names[NUM_VPU_BLOCKS] = { "ocd_vpu0_v3", "ocd_vpu1_v3" }; - pva_kmd_debugfs_create_bool(pva, "stats_enable", + struct pva_kmd_file_ops *profiling_fops; + + pva_kmd_debugfs_create_bool(pva, "stats_enabled", &pva->debugfs_context.stats_enable); pva_kmd_debugfs_create_bool(pva, "vpu_debug", &pva->debugfs_context.vpu_debug); - pva_kmd_debugfs_create_u32(pva, "profile_level", - &pva->debugfs_context.profile_level); - pva->debugfs_context.vpu_fops.read = &update_vpu_stats; + + // Create profiling_level file operations + profiling_fops = &pva->debugfs_context.profiling_level_fops; + profiling_fops->read = profiling_level_read; + profiling_fops->write = profiling_level_write; + profiling_fops->open = NULL; + profiling_fops->release = NULL; + profiling_fops->pdev = pva; + pva_kmd_debugfs_create_file(pva, "profiling_level", profiling_fops); + + pva->debugfs_context.vpu_fops.read = &get_vpu_stats; + pva->debugfs_context.vpu_fops.write = NULL; pva->debugfs_context.vpu_fops.pdev = pva; pva_kmd_debugfs_create_file(pva, "vpu_stats", &pva->debugfs_context.vpu_fops); @@ -41,14 +171,23 @@ void pva_kmd_debugfs_create_nodes(struct pva_kmd_device *pva) &pva->debugfs_context.vpu_ocd_fops[i]); } - pva->debugfs_context.allowlist_fops.read = &get_vpu_allowlist_enabled; - pva->debugfs_context.allowlist_fops.write = &update_vpu_allowlist; - pva->debugfs_context.allowlist_fops.pdev = pva; + pva->debugfs_context.allowlist_ena_fops.read = + &get_vpu_allowlist_enabled; + pva->debugfs_context.allowlist_ena_fops.write = &update_vpu_allowlist; + pva->debugfs_context.allowlist_ena_fops.pdev = pva; pva_kmd_debugfs_create_file(pva, "vpu_app_authentication", - &pva->debugfs_context.allowlist_fops); + &pva->debugfs_context.allowlist_ena_fops); + + pva->debugfs_context.allowlist_path_fops.read = &get_vpu_allowlist_path; + pva->debugfs_context.allowlist_path_fops.write = + &update_vpu_allowlist_path; + pva->debugfs_context.allowlist_path_fops.pdev = pva; + pva_kmd_debugfs_create_file(pva, "allowlist_path", + &pva->debugfs_context.allowlist_path_fops); pva->debugfs_context.fw_debug_log_level_fops.write = &update_fw_debug_log_level; + pva->debugfs_context.fw_debug_log_level_fops.read = NULL; pva->debugfs_context.fw_debug_log_level_fops.pdev = pva; pva_kmd_debugfs_create_file( pva, "fw_debug_log_level", @@ -65,23 +204,6 @@ void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *pva) pva_kmd_debugfs_remove_nodes(pva); } -static uint64_t read_from_buffer_to_user(void *to, uint64_t count, - uint64_t offset, const void *from, - uint64_t available) -{ - if (offset >= available || !count) { - return 0; - } - if (count > available - offset) { - count = available - offset; - } - if (pva_kmd_copy_data_to_user(to, (uint8_t *)from + offset, count)) { - pva_kmd_log_err("failed to copy read buffer to user"); - return 0; - } - return count; -} - static int64_t print_vpu_stats(struct pva_kmd_tegrastats *kmd_tegra_stats, uint8_t *out_buffer, uint64_t offset, uint64_t len) @@ -114,8 +236,8 @@ static int64_t print_vpu_stats(struct pva_kmd_tegrastats *kmd_tegra_stats, formatted_len); } -int64_t update_vpu_stats(struct pva_kmd_device *dev, void *file_data, - uint8_t *out_buffer, uint64_t offset, uint64_t size) +int64_t get_vpu_stats(struct pva_kmd_device *dev, void *file_data, + uint8_t *out_buffer, uint64_t offset, uint64_t size) { struct pva_kmd_tegrastats kmd_tegra_stats; @@ -124,7 +246,6 @@ int64_t update_vpu_stats(struct pva_kmd_device *dev, void *file_data, kmd_tegra_stats.average_vpu_utilization[0] = 0; kmd_tegra_stats.average_vpu_utilization[1] = 0; - pva_kmd_log_err("Reading VPU stats"); pva_kmd_notify_fw_get_tegra_stats(dev, &kmd_tegra_stats); return print_vpu_stats(&kmd_tegra_stats, out_buffer, offset, size); @@ -161,7 +282,7 @@ int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data, } // Copy a single character, ignore the rest - retval = pva_kmd_copy_data_from_user(strbuf, in_buffer, 1); + retval = pva_kmd_copy_data_from_user(strbuf, in_buffer + offset, 1); if (retval != 0u) { pva_kmd_log_err("Failed to copy write buffer from user"); return -1; @@ -181,16 +302,72 @@ int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data, return size; } +int64_t get_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data, + uint8_t *out_buffer, uint64_t offset, + uint64_t size) +{ + uint64_t len; + pva_kmd_mutex_lock(&(pva->pva_auth->allow_list_lock)); + len = read_from_buffer_to_user( + out_buffer, size, offset, + pva->pva_auth->pva_auth_allowlist_path, + safe_addu64(strlen(pva->pva_auth->pva_auth_allowlist_path), + 1u)); + pva_kmd_mutex_unlock(&(pva->pva_auth->allow_list_lock)); + return len; +} + +int64_t update_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data, + const uint8_t *in_buffer, uint64_t offset, + uint64_t size) +{ + char buffer[ALLOWLIST_FILE_LEN]; + unsigned long retval; + + if (size > sizeof(buffer)) { + pva_kmd_log_err_u64( + "Length of allowlist path is too long. It must be less than ", + sizeof(buffer)); + return -1; + } + + retval = pva_kmd_copy_data_from_user(buffer, in_buffer, size); + if (retval != 0u) { + pva_kmd_log_err("Failed to copy write buffer from user"); + return -1; + } + + //Replacing last character from new-line to null terminator + buffer[safe_subu64(size, 1u)] = '\0'; + + pva_kmd_mutex_lock(&(pva->pva_auth->allow_list_lock)); + pva_kmd_update_allowlist_path(pva, buffer); + pva_kmd_mutex_unlock(&(pva->pva_auth->allow_list_lock)); + + return size; +} + int64_t update_fw_debug_log_level(struct pva_kmd_device *pva, void *file_data, const uint8_t *in_buffer, uint64_t offset, uint64_t size) { uint32_t log_level; unsigned long retval; - char strbuf[11]; // 10 bytes for the highest 32bit value and another 1 byte for the Null character + size_t copy_size; uint32_t base = 10; + char strbuf[11]; // 10 bytes for the highest 32bit value and another 1 byte for the Null character + strbuf[10] = '\0'; - retval = pva_kmd_copy_data_from_user(strbuf, in_buffer, sizeof(strbuf)); + if (size == 0) { + pva_kmd_log_err("Write failed, no data provided"); + return -1; + } + + /* Copy minimum of buffer size and input size */ + copy_size = (size < (sizeof(strbuf) - 1)) ? size : (sizeof(strbuf) - 1); + + retval = pva_kmd_copy_data_from_user(strbuf, in_buffer + offset, + copy_size); if (retval != 0u) { pva_kmd_log_err("Failed to copy write buffer from user"); return -1; @@ -198,7 +375,6 @@ int64_t update_fw_debug_log_level(struct pva_kmd_device *pva, void *file_data, log_level = pva_kmd_strtol(strbuf, base); - pva_kmd_print_str_u64("Setting debug log level to", log_level); pva->fw_debug_log_level = log_level; /* If device is on, busy the device and set the debug log level */ @@ -216,5 +392,5 @@ int64_t update_fw_debug_log_level(struct pva_kmd_device *pva, void *file_data, pva_kmd_device_idle(pva); } err_end: - return strlen(strbuf); + return copy_size; } diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h index 1e57d8d6..6e8b4269 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_debugfs.h @@ -9,11 +9,6 @@ #define NUM_VPU_BLOCKS 2U -/** - * Maximum length of file operation - */ -#define MAX_FILE_LEN 256U - struct pva_kmd_file_ops { int (*open)(struct pva_kmd_device *dev); int (*release)(struct pva_kmd_device *dev); @@ -30,10 +25,12 @@ struct pva_kmd_debugfs_context { bool vpu_debug; bool vpu_print_enable; char *allowlist_path; - uint32_t profile_level; + uint32_t profiling_level; struct pva_kmd_file_ops vpu_fops; - struct pva_kmd_file_ops allowlist_fops; + struct pva_kmd_file_ops allowlist_ena_fops; + struct pva_kmd_file_ops allowlist_path_fops; struct pva_kmd_file_ops hwpm_fops; + struct pva_kmd_file_ops profiling_level_fops; void *data_hwpm; struct pva_kmd_file_ops vpu_ocd_fops[NUM_VPU_BLOCKS]; struct pva_kmd_fw_profiling_config g_fw_profiling_config; @@ -42,14 +39,20 @@ struct pva_kmd_debugfs_context { void pva_kmd_debugfs_create_nodes(struct pva_kmd_device *dev); void pva_kmd_debugfs_destroy_nodes(struct pva_kmd_device *dev); -int64_t update_vpu_stats(struct pva_kmd_device *dev, void *file_data, - uint8_t *out_buffer, uint64_t offset, uint64_t size); +int64_t get_vpu_stats(struct pva_kmd_device *dev, void *file_data, + uint8_t *out_buffer, uint64_t offset, uint64_t size); int64_t update_vpu_allowlist(struct pva_kmd_device *pva, void *file_data, const uint8_t *in_buffer, uint64_t offset, uint64_t size); int64_t get_vpu_allowlist_enabled(struct pva_kmd_device *pva, void *file_data, uint8_t *out_buffer, uint64_t offset, uint64_t size); +int64_t update_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data, + const uint8_t *in_buffer, uint64_t offset, + uint64_t size); +int64_t get_vpu_allowlist_path(struct pva_kmd_device *pva, void *file_data, + uint8_t *out_buffer, uint64_t offset, + uint64_t size); int64_t update_fw_debug_log_level(struct pva_kmd_device *dev, void *file_data, const uint8_t *in_buffer, uint64_t offset, uint64_t size); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c index 78f1d059..d31c3551 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.c @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. #include "pva_api_types.h" +#include "pva_kmd_shim_init.h" #include "pva_kmd_utils.h" #include "pva_api_cmdbuf.h" #include "pva_api.h" @@ -223,7 +224,12 @@ struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id, ASSERT(err == PVA_SUCCESS); pva->is_suspended = false; + +#if PVA_IS_DEBUG == 1 + pva->fw_debug_log_level = 255U; +#else pva->fw_debug_log_level = 0U; +#endif return pva; } @@ -260,6 +266,44 @@ void pva_kmd_device_destroy(struct pva_kmd_device *pva) pva_kmd_free(pva); } +static enum pva_error +pva_kmd_notify_fw_set_profiling_level(struct pva_kmd_device *pva, + uint32_t level) +{ + struct pva_kmd_cmdbuf_builder builder; + struct pva_kmd_submitter *dev_submitter = &pva->submitter; + struct pva_cmd_set_profiling_level *cmd; + uint32_t fence_val; + enum pva_error err; + + err = pva_kmd_submitter_prepare(dev_submitter, &builder); + if (err != PVA_SUCCESS) { + goto err_out; + } + + cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd)); + ASSERT(cmd != NULL); + pva_kmd_set_cmd_set_profiling_level(cmd, level); + + err = pva_kmd_submitter_submit(dev_submitter, &builder, &fence_val); + if (err != PVA_SUCCESS) { + goto err_out; + } + + err = pva_kmd_submitter_wait(dev_submitter, fence_val, + PVA_KMD_WAIT_FW_POLL_INTERVAL_US, + PVA_KMD_WAIT_FW_TIMEOUT_US); + if (err != PVA_SUCCESS) { + pva_kmd_log_err( + "Waiting for FW timed out when setting profiling level"); + goto err_out; + } + + return PVA_SUCCESS; + +err_out: + return err; +} enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva) { enum pva_error err = PVA_SUCCESS; @@ -275,7 +319,7 @@ enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva) err = pva_kmd_init_fw(pva); if (err != PVA_SUCCESS) { - goto unlock; + goto poweroff; } /* Reset KMD queue */ pva->dev_queue.queue_header->cb_head = 0; @@ -288,21 +332,32 @@ enum pva_error pva_kmd_device_busy(struct pva_kmd_device *pva) // TODO: need better error handling here err = pva_kmd_shared_buffer_init( pva, PVA_PRIV_CCQ_ID, PVA_KMD_FW_BUF_ELEMENT_SIZE, - PVA_KMD_FW_PROFILING_BUF_NUM_ELEMENTS, - pva_kmd_process_fw_profiling_message, NULL, NULL); + PVA_KMD_FW_PROFILING_BUF_NUM_ELEMENTS, NULL, NULL); if (err != PVA_SUCCESS) { pva_kmd_log_err_u64( "pva kmd buffer initialization failed for interface ", PVA_PRIV_CCQ_ID); - goto unlock; + goto deinit_fw; } pva_kmd_notify_fw_enable_profiling(pva); + /* Set FW debug log level */ pva_kmd_notify_fw_set_debug_log_level(pva, pva->fw_debug_log_level); + + // If the user had set profiling level before power-on, send the update to FW + pva_kmd_notify_fw_set_profiling_level( + pva, pva->debugfs_context.profiling_level); } pva->refcount = safe_addu32(pva->refcount, 1U); + pva_kmd_mutex_unlock(&pva->powercycle_lock); + return PVA_SUCCESS; + +deinit_fw: + pva_kmd_deinit_fw(pva); +poweroff: + pva_kmd_power_off(pva); unlock: pva_kmd_mutex_unlock(&pva->powercycle_lock); return err; diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h index ffa161df..dedcab7c 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device.h @@ -20,6 +20,10 @@ #include "pva_kmd_fw_debug.h" #include "pva_kmd_constants.h" #include "pva_kmd_debugfs.h" +#include "pva_kmd_co.h" + +#define NV_PVA0_CLASS_ID 0xF1 +#define NV_PVA1_CLASS_ID 0xF2 struct pva_syncpt_rw_info { /** Dont switch order since syncpt_id and syncpt_iova is prefilled during kmd boot @@ -138,6 +142,9 @@ struct pva_kmd_device { struct pva_vpu_auth *pva_auth; bool is_suspended; + + /** Carveout info for FW */ + struct pva_co_info fw_carveout; }; struct pva_kmd_device *pva_kmd_device_create(enum pva_chip_id chip_id, @@ -161,4 +168,13 @@ void pva_kmd_send_queue_info_by_ccq(struct pva_kmd_device *pva, struct pva_kmd_queue *queue); bool pva_kmd_device_maybe_on(struct pva_kmd_device *pva); + +static inline uint32_t pva_kmd_get_device_class_id(struct pva_kmd_device *pva) +{ + if (pva->device_index == 0) { + return NV_PVA0_CLASS_ID; + } else { + return NV_PVA1_CLASS_ID; + } +} #endif // PVA_KMD_DEVICE_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device_memory_pool.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device_memory_pool.h deleted file mode 100644 index 84d8e871..00000000 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_device_memory_pool.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ - -#ifndef PVA_KMD_DEVICE_MEMORY_POOL_H -#define PVA_KMD_DEVICE_MEMORY_POOL_H -#include "pva_api_types.h" - -struct pva_kmd_device; - -struct pva_kmd_devmem_view { - uint64_t iova; - void *va; -}; - -struct pva_kmd_devmem_pool { -}; - -enum pva_error pva_kmd_devmem_pool_init(struct pva_kmd_device *dev, - uint32_t smmu_context_id, - uint32_t block_size, - uint32_t alloc_step, - struct pva_kmd_devmem_pool *pool); - -enum pva_error pva_kmd_devmem_pool_acquire(struct pva_kmd_devmem_pool *pool, - struct pva_kmd_devmem_view *view); - -enum pva_error pva_kmd_devmem_pool_release(struct pva_kmd_devmem_pool *pool, - struct pva_kmd_devmem_view *view); - -enum pva_error pva_kmd_devmem_pool_deinit(struct pva_kmd_devmem_pool *pool); - -#endif \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.c index 7333b9fe..ebc79965 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + #include "pva_kmd_dma_cfg.h" #include "pva_utils.h" #include "pva_kmd_resource_table.h" @@ -26,8 +27,8 @@ static void trace_dma_channels(struct pva_dma_config const *dma_config, uint8_t *desc_to_ch) { uint32_t ch_index; - struct pva_dma_config_header const *cfg_hdr = &dma_config->header; - struct pva_dma_channel *channel; + const struct pva_dma_config_header *cfg_hdr = &dma_config->header; + const struct pva_dma_channel *channel; uint32_t num_descs = dma_config->header.num_descriptors; for (ch_index = 0; ch_index < cfg_hdr->num_channels; ch_index++) { @@ -51,7 +52,8 @@ static void trace_dma_channels(struct pva_dma_config const *dma_config, enum pva_error pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table, - void *dma_config_payload, uint32_t dma_config_size, + const struct pva_ops_dma_config_register *dma_cfg_hdr, + uint32_t dma_config_size, struct pva_kmd_dma_resource_aux *dma_aux, void *fw_dma_cfg, uint32_t *out_fw_fetch_size) { @@ -75,7 +77,7 @@ pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table, access_sizes, 0, (PVA_MAX_NUM_DMA_DESC * sizeof(struct pva_kmd_dma_access))); - err = pva_kmd_parse_dma_config(dma_config_payload, dma_config_size, + err = pva_kmd_parse_dma_config(dma_cfg_hdr, dma_config_size, &dma_config, &resource_table->pva->hw_consts); if (err != PVA_SUCCESS) { diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.h index b9205873..d349bf30 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + #ifndef PVA_KMD_DMA_CFG_H #define PVA_KMD_DMA_CFG_H @@ -12,6 +13,18 @@ /* Right shift value for moving GOB offset value extracted from surface address to LSB */ #define PVA_DMA_BL_GOB_OFFSET_MASK_RSH 6U +#define MAX_DESC_ID 0x3FU + +enum pva_dma_frame_rep { + REPLICATION_NONE = 0, + REPLICATION_TWO_WAY, + REPLICATION_FOUR_WAY, + REPLICATION_EIGHT_WAY, + REPLICATION_SIXTEEN_WAY, + REPLICATION_THIRTYTWO_WAY, + REPLICATION_FULL +}; + struct pva_kmd_dma_access_entry { int64_t start_addr; int64_t end_addr; @@ -47,7 +60,8 @@ struct pva_kmd_dma_resource_aux { }; enum pva_error -pva_kmd_parse_dma_config(void *dma_config, uint32_t dma_config_size, +pva_kmd_parse_dma_config(const struct pva_ops_dma_config_register *dma_cfg_hdr, + uint32_t dma_config_size, struct pva_dma_config *out_cfg, struct pva_kmd_hw_constants const *hw_consts); @@ -123,7 +137,8 @@ void pva_kmd_write_fw_dma_config(struct pva_dma_config const *dma_cfg, */ enum pva_error pva_kmd_load_dma_config(struct pva_kmd_resource_table *resource_table, - void *dma_config, uint32_t dma_config_size, + const struct pva_ops_dma_config_register *dma_cfg_hdr, + uint32_t dma_config_size, struct pva_kmd_dma_resource_aux *dma_aux, void *fw_dma_cfg, uint32_t *out_fw_fetch_size); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_binding.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_binding.c index 7deea4a3..75dd3370 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_binding.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_binding.c @@ -307,7 +307,7 @@ bind_static_vmem_slot(struct pva_dma_config_resource *dma_config, desc->frda = ((uint16_t)(buffer_addr >> 6U) + desc->frda) & - 0x3FFF; + 0x7FFF; } } diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c index bf988991..a7f0e71c 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_validate.c @@ -9,6 +9,7 @@ #include "pva_api_dma.h" #include "pva_kmd_device.h" #include "pva_math_utils.h" +#include "pva_utils.h" struct pva_fw_dma_reloc_slot_info { struct pva_fw_dma_slot *slots; @@ -21,13 +22,31 @@ struct pva_fw_dma_reloc_slots { struct pva_fw_dma_reloc_slot_info static_slot; }; +static enum pva_error check_replication(struct pva_dma_config const *out_cfg, + struct pva_dma_channel const *channel) +{ + enum pva_error err = PVA_SUCCESS; + switch (channel->ch_rep_factor) { + case (uint8_t)REPLICATION_NONE: + case (uint8_t)REPLICATION_FULL: + break; + default: { + pva_kmd_log_err("Invalid Channel Replication Factor"); + err = PVA_INVAL; + } break; + } + + return err; +} + static enum pva_error validate_channel_mapping(struct pva_dma_config const *out_cfg, struct pva_kmd_hw_constants const *hw_consts) { - struct pva_dma_channel *channel; + const struct pva_dma_channel *channel; struct pva_dma_config_header const *cfg_hdr = &out_cfg->header; pva_math_error math_err = MATH_OP_SUCCESS; + enum pva_error err = PVA_SUCCESS; for (uint8_t i = 0U; i < cfg_hdr->num_channels; i++) { channel = &out_cfg->channels[i]; @@ -47,6 +66,11 @@ validate_channel_mapping(struct pva_dma_config const *out_cfg, pva_kmd_log_err("ERR: Invalid ADB Buff Size or Offset"); return PVA_INVAL; } + err = check_replication(out_cfg, channel); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Invalid Channel Replication Factor"); + return err; + } } if (math_err != MATH_OP_SUCCESS) { pva_kmd_log_err("validate_channel_mapping math error"); @@ -56,7 +80,7 @@ validate_channel_mapping(struct pva_dma_config const *out_cfg, return PVA_SUCCESS; } -static enum pva_error validate_padding(struct pva_dma_descriptor *desc) +static enum pva_error validate_padding(const struct pva_dma_descriptor *desc) { if ((desc->px != 0U) && (desc->px >= desc->tx)) { return PVA_INVAL; @@ -69,7 +93,7 @@ static enum pva_error validate_padding(struct pva_dma_descriptor *desc) return PVA_SUCCESS; } -static bool is_valid_vpu_trigger_mode(struct pva_dma_descriptor *desc) +static bool is_valid_vpu_trigger_mode(const struct pva_dma_descriptor *desc) { bool valid = true; if (desc->trig_event_mode != 0U) { @@ -119,7 +143,7 @@ static bool is_valid_vpu_trigger_mode(struct pva_dma_descriptor *desc) return valid; } -static bool validate_src_dst_adv_val(struct pva_dma_descriptor *desc, +static bool validate_src_dst_adv_val(const struct pva_dma_descriptor *desc, bool relax_dim3_check) { uint8_t is_any_rpt_zero = 0U; @@ -150,7 +174,7 @@ static bool validate_src_dst_adv_val(struct pva_dma_descriptor *desc, } static enum pva_error -validate_dma_desc_trans_cntl2(struct pva_dma_descriptor *desc) +validate_dma_desc_trans_cntl2(const struct pva_dma_descriptor *desc) { if ((desc->prefetch_enable != 0U) && ((desc->tx == 0U) || (desc->ty == 0U) || @@ -162,7 +186,7 @@ validate_dma_desc_trans_cntl2(struct pva_dma_descriptor *desc) } static enum pva_error -validate_descriptor(struct pva_dma_descriptor *desc, +validate_descriptor(const struct pva_dma_descriptor *desc, struct pva_dma_config_header const *cfg_hdr) { enum pva_error err = PVA_SUCCESS; @@ -202,10 +226,29 @@ validate_descriptor(struct pva_dma_descriptor *desc, return PVA_SUCCESS; } +struct pva_kmd_offset_pairs { + uint32_t start; + uint32_t end; +}; + +#define PVA_KMD_DMA_CONFIG_ARRAY_COUNT 4U + static bool -is_dma_config_header_valid(struct pva_dma_config_header const *cfg_hdr, +is_dma_config_header_valid(struct pva_ops_dma_config_register const *ops_hdr, + uint32_t dma_config_size, struct pva_kmd_hw_constants const *hw_consts) { + struct pva_kmd_offset_pairs offsets[PVA_KMD_DMA_CONFIG_ARRAY_COUNT]; + struct pva_dma_config_header const *cfg_hdr; + pva_math_error math_err = MATH_OP_SUCCESS; + + if (dma_config_size < sizeof(*ops_hdr)) { + pva_kmd_log_err("DMA configuration too small"); + return PVA_INVAL; + } + + cfg_hdr = &ops_hdr->dma_config_header; + if (((cfg_hdr->base_descriptor + cfg_hdr->num_descriptors) > hw_consts->n_dma_descriptors) || ((cfg_hdr->base_channel + cfg_hdr->num_channels) > @@ -217,61 +260,98 @@ is_dma_config_header_valid(struct pva_dma_config_header const *cfg_hdr, (cfg_hdr->base_channel == 0U)) { return false; } + + offsets[0].start = ops_hdr->channels_offset; + offsets[0].end = addu32( + ops_hdr->channels_offset, + align8_u32(mulu32(cfg_hdr->num_channels, + sizeof(struct pva_dma_channel), &math_err), + &math_err), + &math_err); + + offsets[1].start = ops_hdr->descriptors_offset; + offsets[1].end = addu32( + ops_hdr->descriptors_offset, + align8_u32(mulu32(cfg_hdr->num_descriptors, + sizeof(struct pva_dma_descriptor), &math_err), + &math_err), + &math_err); + + offsets[2].start = ops_hdr->hwseq_words_offset; + offsets[2].end = addu32(ops_hdr->hwseq_words_offset, + align8_u32(mulu32(cfg_hdr->num_hwseq_words, + sizeof(uint32_t), &math_err), + &math_err), + &math_err); + + offsets[3].start = ops_hdr->static_bindings_offset; + offsets[3].end = + addu32(ops_hdr->static_bindings_offset, + align8_u32(mulu32(cfg_hdr->num_static_slots, + sizeof(struct pva_dma_static_binding), + &math_err), + &math_err), + &math_err); + + if (math_err != MATH_OP_SUCCESS) { + pva_kmd_log_err("DMA config field offset math error"); + return false; + } + //Validate: + // 1. All start offsets are aligned to 8 bytes + // 2. All end offsets are within the dma_config_size + // Note: We do not check if the ranges overlap because we do not modify the buffer in place. + for (uint32_t i = 0; i < PVA_KMD_DMA_CONFIG_ARRAY_COUNT; i++) { + if (offsets[i].start % 8 != 0) { + pva_kmd_log_err( + "DMA config field offset is not aligned to 8 bytes"); + return false; + } + if (offsets[i].end > dma_config_size) { + pva_kmd_log_err("DMA config field is out of bounds"); + return false; + } + } + return true; } enum pva_error -pva_kmd_parse_dma_config(void *dma_config, uint32_t dma_config_size, +pva_kmd_parse_dma_config(const struct pva_ops_dma_config_register *ops_hdr, + uint32_t dma_config_size, struct pva_dma_config *out_cfg, struct pva_kmd_hw_constants const *hw_consts) { - struct pva_dma_config_header const *cfg_hdr = dma_config; - uintptr_t offset = 0; - - if (dma_config_size < sizeof(*cfg_hdr)) { - pva_kmd_log_err("DMA configuration too small"); - return PVA_INVAL; - } - - out_cfg->header = *cfg_hdr; - if (!(is_dma_config_header_valid(cfg_hdr, hw_consts))) { + if (!(is_dma_config_header_valid(ops_hdr, dma_config_size, + hw_consts))) { pva_kmd_log_err("Invalid PVA DMA Configuration Header"); return PVA_INVAL; } - offset += PVA_ALIGN8(sizeof(*cfg_hdr)); + out_cfg->header = ops_hdr->dma_config_header; - out_cfg->hwseq_words = pva_offset_pointer(dma_config, offset); - offset += PVA_ALIGN8(cfg_hdr->num_hwseq_words * - sizeof(*out_cfg->hwseq_words)); + out_cfg->hwseq_words = + pva_offset_const_ptr(ops_hdr, ops_hdr->hwseq_words_offset); - out_cfg->channels = pva_offset_pointer(dma_config, offset); - offset += - PVA_ALIGN8(cfg_hdr->num_channels * sizeof(*out_cfg->channels)); + out_cfg->channels = + pva_offset_const_ptr(ops_hdr, ops_hdr->channels_offset); - out_cfg->descriptors = pva_offset_pointer(dma_config, offset); - offset += PVA_ALIGN8(cfg_hdr->num_descriptors * - sizeof(*out_cfg->descriptors)); + out_cfg->descriptors = + pva_offset_const_ptr(ops_hdr, ops_hdr->descriptors_offset); - out_cfg->static_bindings = pva_offset_pointer(dma_config, offset); - offset += PVA_ALIGN8(cfg_hdr->num_static_slots * - sizeof(*out_cfg->static_bindings)); - - if (offset > dma_config_size) { - pva_kmd_log_err("DMA configuration is smaller than expected"); - return PVA_INVAL; - } + out_cfg->static_bindings = + pva_offset_const_ptr(ops_hdr, ops_hdr->static_bindings_offset); return PVA_SUCCESS; } static enum pva_error -validate_descriptors(struct pva_dma_config const *dma_config) +validate_descriptors(const struct pva_dma_config *dma_config) { uint32_t i = 0U; enum pva_error err = PVA_SUCCESS; - struct pva_dma_config_header const *cfg_hdr = &dma_config->header; - struct pva_dma_descriptor *desc; + const struct pva_dma_config_header *cfg_hdr = &dma_config->header; + const struct pva_dma_descriptor *desc; for (i = 0; i < cfg_hdr->num_descriptors; i++) { if (pva_is_reserved_desc(i)) { @@ -474,7 +554,7 @@ static void count_relocs(struct pva_dma_config const *dma_cfg, uint16_t num_dyn_slots) { uint8_t i; - struct pva_dma_descriptor *desc; + const struct pva_dma_descriptor *desc; for (i = 0U; i < dma_cfg->header.num_descriptors; i++) { if (pva_is_reserved_desc(i)) { @@ -543,14 +623,14 @@ static void handle_reloc(uint16_t slot, uint8_t transfer_mode, } } -static void write_relocs(struct pva_dma_config const *dma_cfg, +static void write_relocs(const struct pva_dma_config *dma_cfg, struct pva_kmd_dma_access const *access_sizes, struct pva_fw_dma_reloc_slots *rel_info, uint8_t const *desc_to_ch) { uint32_t i; uint16_t start_idx = 0U; - struct pva_dma_descriptor *desc = NULL; + const struct pva_dma_descriptor *desc = NULL; uint8_t ch_index = 0U; for (i = 0U; i < rel_info->dyn_slot.num_slots; i++) { @@ -587,7 +667,7 @@ static void write_relocs(struct pva_dma_config const *dma_cfg, } static enum pva_error -validate_descriptor_tile_and_padding(struct pva_dma_descriptor *desc, +validate_descriptor_tile_and_padding(const struct pva_dma_descriptor *desc, bool is_dst) { enum pva_error err = PVA_SUCCESS; @@ -608,13 +688,13 @@ validate_descriptor_tile_and_padding(struct pva_dma_descriptor *desc, return PVA_SUCCESS; } -static enum pva_error get_access_size(struct pva_dma_descriptor *desc, +static enum pva_error get_access_size(const struct pva_dma_descriptor *desc, struct pva_kmd_dma_access_entry *entry, bool is_dst, struct pva_kmd_dma_access_entry *dst2) { - struct pva_dma_transfer_attr *attr = NULL; + const struct pva_dma_transfer_attr *attr = NULL; uint32_t tx = 0U; uint32_t ty = 0U; uint64_t tile_size = 0U; @@ -727,7 +807,7 @@ pva_kmd_compute_dma_access(struct pva_dma_config const *dma_cfg, uint64_t *hw_dma_descs_mask) { uint32_t i; - struct pva_dma_descriptor *desc = NULL; + const struct pva_dma_descriptor *desc = NULL; enum pva_error err = PVA_SUCCESS; bool skip_swseq_size_compute = false; @@ -764,7 +844,6 @@ pva_kmd_compute_dma_access(struct pva_dma_config const *dma_cfg, //Calculate dst_size err = get_access_size(desc, &access_sizes[i].dst, true, &access_sizes[i].dst2); - if (err != PVA_SUCCESS) { goto out; } diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_write.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_write.c index a7b4096d..33b67aa9 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_write.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_dma_cfg_write.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + #include "pva_kmd_resource_table.h" #include "pva_kmd_device_memory.h" #include "pva_api.h" @@ -131,7 +132,7 @@ static void write_dma_descriptor(struct pva_dma_descriptor const *desc, /* DMA_DESC_PY */ fw_desc->py = desc->py; /* DMA_DESC_FRDA */ - fw_desc->frda = ((desc->dst2_offset >> 6U) & 0x3FFF); + fw_desc->frda = ((desc->dst2_offset >> 6U) & 0x7FFF); /* DMA_DESC_NDTM_CNTL0 */ fw_desc->cb_ext = (((desc->src.cb_start >> 16) & 0x1) << 0) | diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_executable.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_executable.h index 65592ca6..4a674af7 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_executable.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_executable.h @@ -53,7 +53,7 @@ pva_kmd_get_symbol_with_type(struct pva_kmd_exec_symbol_table *symbol_table, } enum pva_error -pva_kmd_load_executable(void *executable_data, uint32_t executable_size, +pva_kmd_load_executable(const void *executable_data, uint32_t executable_size, struct pva_kmd_device *pva, uint8_t dma_smmu_id, struct pva_kmd_exec_symbol_table *out_symbol_table, struct pva_kmd_device_memory **out_metainfo, diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c index 0750ce9a..0fa3bea5 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_debug.c @@ -31,7 +31,6 @@ enum pva_error pva_kmd_notify_fw_set_debug_log_level(struct pva_kmd_device *pva, ASSERT(cmd != NULL); pva_kmd_set_cmd_set_debug_log_level(cmd, log_level); - pva_kmd_print_str_u64("set debug log level cmd:", cmd->log_level); err = pva_kmd_submitter_submit(submitter, &builder, &fence_val); if (err != PVA_SUCCESS) { diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c index b8cd4149..7327a291 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.c @@ -238,15 +238,12 @@ static void decode_and_print_event(unsigned long walltime, } } -enum pva_error pva_kmd_process_fw_profiling_message(void *context, - uint8_t interface, - uint8_t *element) +enum pva_error pva_kmd_process_fw_event(struct pva_kmd_device *pva, + uint8_t *data, uint32_t data_size) { - struct pva_kmd_device *pva = (struct pva_kmd_device *)context; - uint64_t timestamp = 0; char msg_string[200] = { '\0' }; - struct pva_fw_event_message message; + struct pva_fw_event_message event_header; static uint64_t prev_walltime = 0U; uint64_t relative_time = 0U; @@ -255,8 +252,14 @@ enum pva_error pva_kmd_process_fw_profiling_message(void *context, static const uint64_t r5_cycle_duration = 1000000000000 / r5_freq; uint64_t walltime = 0U; // in nanoseconds - memcpy(&message, element, sizeof(message)); - memcpy(×tamp, &element[sizeof(message)], + if (data_size < + (sizeof(event_header) + + pva->debugfs_context.g_fw_profiling_config.timestamp_size)) { + return PVA_INVAL; + } + + memcpy(&event_header, data, sizeof(event_header)); + memcpy(×tamp, &data[sizeof(event_header)], pva->debugfs_context.g_fw_profiling_config.timestamp_size); if (pva->debugfs_context.g_fw_profiling_config.timestamp_type == @@ -271,7 +274,7 @@ enum pva_error pva_kmd_process_fw_profiling_message(void *context, relative_time = (prev_walltime > walltime) ? 0U : safe_subu64(walltime, prev_walltime); - decode_and_print_event(walltime, relative_time, message, + decode_and_print_event(walltime, relative_time, event_header, &msg_string[0]); pva_kmd_print_str(msg_string); prev_walltime = walltime; diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.h index 47d67682..9ea337e5 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_fw_profiler.h @@ -18,9 +18,8 @@ void pva_kmd_device_init_profiler(struct pva_kmd_device *pva); void pva_kmd_device_deinit_profiler(struct pva_kmd_device *pva); -enum pva_error pva_kmd_process_fw_profiling_message(void *context, - uint8_t interface, - uint8_t *element); +enum pva_error pva_kmd_process_fw_event(struct pva_kmd_device *pva, + uint8_t *data, uint32_t data_size); enum pva_error pva_kmd_notify_fw_enable_profiling(struct pva_kmd_device *pva); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.c index ff2b8526..99c87cbc 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.c @@ -7,7 +7,6 @@ //TODO: Use nv_speculate barrier //#include "nv_speculation_barrier.h" -#define MAX_DESC_ID 0x3FU #define HWSEQ_MIN_WORDS 5U static inline const void *read_hwseq_blob(struct pva_hwseq_buffer *buffer, @@ -287,7 +286,8 @@ check_vmem_setup(struct pva_dma_transfer_attr const *attr, * - PVA_SUCCESS if valid source/destination pair is found * - PVA_INVAL if invalid source/destination pair is found */ -static enum pva_error validate_xfer_mode(struct pva_dma_descriptor *dma_desc) +static enum pva_error +validate_xfer_mode(const struct pva_dma_descriptor *dma_desc) { enum pva_error err = PVA_SUCCESS; @@ -386,8 +386,8 @@ static enum pva_error validate_dst_vmem(struct pva_hwseq_priv *hwseq, int64_t end_addr = 0LL; int64_t num_bytes = 0LL; int64_t offset = 0LL; - struct pva_dma_descriptor *head_desc = hwseq->head_desc; - struct pva_dma_descriptor *tail_desc = hwseq->tail_desc; + const struct pva_dma_descriptor *head_desc = hwseq->head_desc; + const struct pva_dma_descriptor *tail_desc = hwseq->tail_desc; uint8_t head_desc_id = get_head_desc_did(hwseq); pva_math_error math_err = MATH_OP_SUCCESS; @@ -518,8 +518,8 @@ static enum pva_error validate_src_vmem(struct pva_hwseq_priv *hwseq, uint32_t *vmem_tile_count, bool has_dim3) { - struct pva_dma_descriptor *head_desc = hwseq->head_desc; - struct pva_dma_descriptor *tail_desc = hwseq->tail_desc; + const struct pva_dma_descriptor *head_desc = hwseq->head_desc; + const struct pva_dma_descriptor *tail_desc = hwseq->tail_desc; uint8_t head_desc_id = get_head_desc_did(hwseq); uint32_t tx = 0U; uint32_t ty = 0U; @@ -840,8 +840,9 @@ swap_frame_boundaries(struct pva_hwseq_frame_info *frame_info) * - PVA_SUCCESS if above checks pass * - PVA_INVAL if any of the above checks fail */ -static enum pva_error check_padding_tiles(struct pva_dma_descriptor *head_desc, - struct pva_dma_descriptor *tail_desc) +static enum pva_error +check_padding_tiles(const struct pva_dma_descriptor *head_desc, + const struct pva_dma_descriptor *tail_desc) { if ((head_desc->px != 0U) || (head_desc->py != 0U) || (head_desc->desc_reload_enable != 0U)) { @@ -1053,7 +1054,7 @@ static enum pva_error check_tile_offset(struct pva_hwseq_priv *hwseq) static void get_sequencing_and_dim3(struct pva_hwseq_priv *hwseq, bool *sequencing_to_vmem, bool *has_dim3) { - struct pva_dma_descriptor *head_desc = hwseq->head_desc; + const struct pva_dma_descriptor *head_desc = hwseq->head_desc; *sequencing_to_vmem = (head_desc->dst.transfer_mode == (uint8_t)PVA_DMA_TRANS_MODE_VMEM); // Check if this a 3D tensor transfer. @@ -1102,8 +1103,8 @@ validate_dma_boundaries(struct pva_hwseq_priv *hwseq, uint16_t frame_line_pitch = 0U; int64_t frame_buffer_offset = 0; struct pva_hwseq_frame_info frame_info = { 0 }; - struct pva_dma_descriptor *head_desc = hwseq->head_desc; - struct pva_dma_descriptor *tail_desc = hwseq->tail_desc; + const struct pva_dma_descriptor *head_desc = hwseq->head_desc; + const struct pva_dma_descriptor *tail_desc = hwseq->tail_desc; err = check_tile_offset(hwseq); if (err != PVA_SUCCESS) { @@ -1493,8 +1494,8 @@ static enum pva_error validate_hwseq_blob(struct pva_hwseq_priv *hwseq_info, } static enum pva_error -validate_channel_accesses(struct pva_dma_channel const *ch, - struct pva_dma_config_header const *header, +validate_channel_accesses(const struct pva_dma_channel *ch, + const struct pva_dma_config_header *header, enum pva_hw_gen hw_gen, struct hw_seq_blob_entry *entry) { @@ -1530,7 +1531,7 @@ enum pva_error validate_hwseq(struct pva_dma_config const *dma_config, uint32_t i = 0U; struct pva_hwseq_priv hwseq_info = { 0 }; enum pva_error err = PVA_SUCCESS; - struct pva_dma_channel *ch = NULL; + const struct pva_dma_channel *ch = NULL; struct hw_seq_blob_entry entries[PVA_MAX_NUM_DMA_CHANNELS] = { 0 }; uint8_t num_hwseqs = 0U; uint8_t num_channels = dma_config->header.num_channels; @@ -1553,20 +1554,17 @@ enum pva_error validate_hwseq(struct pva_dma_config const *dma_config, } for (i = 0U; i < num_hwseqs; i++) { - uint32_t start_address = entries[i].hwseq_start; - uint32_t end_address = entries[i].hwseq_end + 1U; - uint32_t curr_offset = start_address << 2U; - uint32_t size = 0U; + uint32_t start_index = entries[i].hwseq_start; + uint32_t end_index = entries[i].hwseq_end + 1U; + uint32_t curr_offset = start_index << 2U; + uint32_t len = 0U; //Populate hwseq blob hwseq_info.blob.data = (uint8_t *)((uintptr_t)(dma_config->hwseq_words) + (curr_offset)); - size = safe_subu32(end_address, start_address); - if (size > (hw_consts->n_hwseq_words >> 2U)) { - return PVA_ERR_HWSEQ_INVALID; - } - hwseq_info.blob.bytes_left = size << 2U; + len = safe_subu32(end_index, start_index); + hwseq_info.blob.bytes_left = (len << 2U); err = validate_hwseq_blob(&hwseq_info, &entries[i], hw_dma_descs_mask); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.h index 3da9b0c1..5af47238 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_hwseq_validate.h @@ -220,7 +220,7 @@ struct hw_seq_blob_entry { /** * TODO: Separate out pva_hwseq_priv to be more modular - * + * * Items in pva_hwseq_main * - dma_config * - hw_gen @@ -299,11 +299,11 @@ struct pva_hwseq_priv { /** * Pointer to the Head Descriptor of type \ref nvpva_dma_descriptor in the HW Sequencer */ - struct pva_dma_descriptor *head_desc; + const struct pva_dma_descriptor *head_desc; /** * Pointer to the Tail Descriptor of type \ref nvpva_dma_descriptor in the HW Sequencer */ - struct pva_dma_descriptor *tail_desc; + const struct pva_dma_descriptor *tail_desc; /** * DMA Descriptor information obtained from HW Sequencer Blob of type * \ref pva_dma_hwseq_desc_entry_t diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_id_allocator.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_id_allocator.c deleted file mode 100644 index e30ca935..00000000 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_id_allocator.c +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - -#include "pva_kmd_id_allocator.h" -#include "pva_api_types.h" -#include "pva_kmd_utils.h" - -enum pva_error pva_kmd_id_allocator_init(struct pva_kmd_id_allocator *allocator, - uint32_t base_id, uint32_t n_entries) -{ - enum pva_error err = PVA_SUCCESS; - - allocator->n_entries = n_entries; - allocator->n_free_ids = n_entries; - allocator->n_used_ids = 0; - - // Allocate space for both free and used IDs - allocator->free_ids = pva_kmd_zalloc(sizeof(uint32_t) * n_entries * 2); - if (allocator->free_ids == NULL) { - err = PVA_NOMEM; - goto err_out; - } - - allocator->used_ids = allocator->free_ids + n_entries; - - // Put free IDs in reverse order so that we allocate in ascending order - for (uint32_t i = 0; i < n_entries; i++) { - allocator->free_ids[i] = base_id + n_entries - i - 1; - } - - return PVA_SUCCESS; - -err_out: - return err; -} - -enum pva_error -pva_kmd_id_allocator_deinit(struct pva_kmd_id_allocator *allocator) -{ - pva_kmd_free(allocator->free_ids); - return PVA_SUCCESS; -} - -enum pva_error pva_kmd_alloc_id(struct pva_kmd_id_allocator *allocator, - uint32_t *id) -{ - if (allocator->n_free_ids == 0) { - return PVA_NOENT; - } - - allocator->n_free_ids--; - *id = allocator->free_ids[allocator->n_free_ids]; - - allocator->used_ids[allocator->n_used_ids] = *id; - allocator->n_used_ids++; - - return PVA_SUCCESS; -} - -void pva_kmd_free_id(struct pva_kmd_id_allocator *allocator, uint32_t id) -{ - ASSERT(allocator->n_used_ids > 0); - ASSERT(allocator->n_free_ids < allocator->n_entries); - - allocator->free_ids[allocator->n_free_ids] = id; - allocator->n_free_ids++; - - allocator->n_used_ids--; -} \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_id_allocator.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_id_allocator.h deleted file mode 100644 index 2f2dd311..00000000 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_id_allocator.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ - -#ifndef PVA_KMD_ID_ALLOCATOR_H -#define PVA_KMD_ID_ALLOCATOR_H -#include "pva_api_types.h" - -struct pva_kmd_id_allocator { - uint32_t n_entries; - uint32_t *free_ids; - uint32_t *used_ids; - uint32_t n_free_ids; - uint32_t n_used_ids; -}; - -enum pva_error pva_kmd_id_allocator_init(struct pva_kmd_id_allocator *allocator, - uint32_t base_id, uint32_t n_entries); - -enum pva_error -pva_kmd_id_allocator_deinit(struct pva_kmd_id_allocator *allocator); - -enum pva_error pva_kmd_alloc_id(struct pva_kmd_id_allocator *allocator, - uint32_t *id); - -void pva_kmd_free_id(struct pva_kmd_id_allocator *allocator, uint32_t id); - -#endif /* PVA_KMD_ID_ALLOCATOR_H */ diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c index c4bf9efa..d4047b90 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.c @@ -60,32 +60,3 @@ void pva_kmd_handle_hyp_msg(void *pva_dev, uint32_t const *data, uint8_t len) FAULT("Unknown message type from firmware"); } } - -enum pva_error pva_kmd_handle_msg_resource_unreg(void *context, - uint8_t interface, - uint8_t *element) -{ - // TODO: if the mapping of CCQ_ID to interface is not 1:1, we need to - // find the CCQ_ID/table_id from interface - uint8_t table_id = interface; - struct pva_kmd_device *pva; - struct pva_kmd_context *ctx; - uint32_t resource_id; - - ASSERT(context != NULL); - pva = (struct pva_kmd_device *)context; - ctx = pva_kmd_get_context(pva, table_id); - - ASSERT(ctx != NULL); - ASSERT(element != NULL); - - /* Resource table ID equals context id */ - memcpy(&resource_id, element, sizeof(resource_id)); - - // We do not lock the resource table here because this function is intended - // to be called from the shared buffer processing function which should acquire - // the required lock. - pva_kmd_drop_resource_unsafe(&ctx->ctx_resource_table, resource_id); - - return PVA_SUCCESS; -} diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.h index 7df74966..622039f6 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_msg.h @@ -20,9 +20,4 @@ void pva_kmd_handle_hyp_msg(void *pva_dev, uint32_t const *data, uint8_t len); * These messages come from CCQ0 statues registers. */ void pva_kmd_handle_msg(void *pva_dev, uint32_t const *data, uint8_t len); - -// TODO: move to a better location OR consolidate handling of all message types here -enum pva_error pva_kmd_handle_msg_resource_unreg(void *context, - uint8_t interface, - uint8_t *element); #endif // PVA_KMD_MSG_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c index bad5f9dc..8f881e28 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.c @@ -1,6 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. #include "pva_kmd_op_handler.h" +#include "pva_api.h" +#include "pva_api_dma.h" +#include "pva_api_types.h" +#include "pva_kmd.h" #include "pva_kmd_resource_table.h" #include "pva_kmd_device_memory.h" #include "pva_kmd_cmdbuf.h" @@ -12,34 +16,39 @@ #include "pva_kmd_vpu_app_auth.h" #include "pva_math_utils.h" -struct pva_kmd_buffer { +struct pva_kmd_ops_buffer { void const *base; uint32_t offset; uint32_t size; }; /* Offset will always be multiple of 8 bytes */ -static void incr_offset(struct pva_kmd_buffer *buf, uint32_t incr) +static void incr_offset(struct pva_kmd_ops_buffer *buf, uint32_t incr) { buf->offset = safe_addu32(buf->offset, incr); buf->offset = safe_pow2_roundup_u32(buf->offset, (uint32_t)sizeof(uint64_t)); } -static bool access_ok(struct pva_kmd_buffer const *buf, uint32_t size) +static bool access_ok(struct pva_kmd_ops_buffer const *buf, uint32_t size) { return safe_addu32(buf->offset, size) <= buf->size; } -static void *read_data(struct pva_kmd_buffer *buf, uint32_t size) +static const void *peek_data(struct pva_kmd_ops_buffer *buf) { - void *data = (void *)((uint8_t *)buf->base + buf->offset); + return (const void *)((uint8_t *)buf->base + buf->offset); +} + +static const void *consume_data(struct pva_kmd_ops_buffer *buf, uint32_t size) +{ + const void *data = peek_data(buf); incr_offset(buf, size); return data; } -static void write_data(struct pva_kmd_buffer *buf, void const *data, - uint32_t size) +static void produce_data(struct pva_kmd_ops_buffer *buf, void const *data, + uint32_t size) { memcpy((uint8_t *)buf->base + buf->offset, data, size); incr_offset(buf, size); @@ -47,35 +56,32 @@ static void write_data(struct pva_kmd_buffer *buf, void const *data, static enum pva_error pva_kmd_op_memory_register_async(struct pva_kmd_context *ctx, - struct pva_kmd_buffer *in_buffer, - struct pva_kmd_buffer *out_buffer, + const void *input_buffer, uint32_t size, + struct pva_kmd_ops_buffer *out_buffer, struct pva_kmd_cmdbuf_builder *cmdbuf_builder) { enum pva_error err = PVA_SUCCESS; - struct pva_kmd_memory_register_in_args *args; - struct pva_kmd_register_out_args out_args = { 0 }; + const struct pva_ops_memory_register *args; + struct pva_ops_response_register out_args = { 0 }; struct pva_kmd_device_memory *dev_mem; struct pva_cmd_update_resource_table *update_cmd; struct pva_resource_entry entry = { 0 }; uint8_t smmu_ctx_id; - uint32_t resource_id = 0; - if (!access_ok(out_buffer, sizeof(struct pva_kmd_register_out_args))) { + if (!access_ok(out_buffer, sizeof(struct pva_ops_response_register))) { return PVA_INVAL; } - if (!access_ok(in_buffer, - sizeof(struct pva_kmd_memory_register_in_args))) { - err = PVA_INVAL; - goto err_out; + if (size != sizeof(struct pva_ops_memory_register)) { + pva_kmd_log_err("Memory register size is not correct"); + return PVA_INVAL; } - args = read_data(in_buffer, - sizeof(struct pva_kmd_memory_register_in_args)); + args = (const struct pva_ops_memory_register *)input_buffer; - dev_mem = pva_kmd_device_memory_acquire(args->memory_handle, - args->offset, args->size, ctx); + dev_mem = pva_kmd_device_memory_acquire(args->import_id, args->offset, + args->size, ctx); if (dev_mem == NULL) { err = PVA_NOMEM; goto err_out; @@ -118,8 +124,9 @@ pva_kmd_op_memory_register_async(struct pva_kmd_context *ctx, out_args.error = PVA_SUCCESS; out_args.resource_id = resource_id; - write_data(out_buffer, &out_args, sizeof(out_args)); - return err; + produce_data(out_buffer, &out_args, sizeof(out_args)); + return PVA_SUCCESS; + free_cmdbuf: pva_kmd_cmdbuf_builder_cancel(cmdbuf_builder); free_dram_buffer_resource: @@ -130,55 +137,57 @@ release: pva_kmd_device_memory_free(dev_mem); err_out: out_args.error = err; - write_data(out_buffer, &out_args, sizeof(out_args)); - return err; + produce_data(out_buffer, &out_args, sizeof(out_args)); + return PVA_SUCCESS; } - static enum pva_error pva_kmd_op_executable_register_async( - struct pva_kmd_context *ctx, struct pva_kmd_buffer *in_buffer, - struct pva_kmd_buffer *out_buffer, + struct pva_kmd_context *ctx, const void *input_buffer, uint32_t size, + struct pva_kmd_ops_buffer *out_buffer, struct pva_kmd_cmdbuf_builder *cmdbuf_builder) { enum pva_error err = PVA_SUCCESS; - struct pva_kmd_executable_register_in_args *args; - struct pva_kmd_exec_register_out_args out_args = { 0 }; + struct pva_ops_executable_register *args; + struct pva_ops_response_executable_register out_args = { 0 }; struct pva_cmd_update_resource_table *update_cmd; struct pva_resource_entry entry = { 0 }; struct pva_kmd_resource_record *rec; uint32_t num_symbols = 0; - void *exec_data; + const void *exec_data; uint32_t resource_id = 0; if (!access_ok(out_buffer, - sizeof(struct pva_kmd_exec_register_out_args))) { + sizeof(struct pva_ops_response_executable_register))) { + pva_kmd_log_err("Response buffer too small"); return PVA_INVAL; } - if (!access_ok(in_buffer, - sizeof(struct pva_kmd_executable_register_in_args))) { - err = PVA_INVAL; - goto err_out; + if (size < sizeof(struct pva_ops_executable_register)) { + pva_kmd_log_err("Executable register size is not correct"); + return PVA_INVAL; } - args = read_data(in_buffer, - sizeof(struct pva_kmd_executable_register_in_args)); - - if (!access_ok(in_buffer, args->size)) { - err = PVA_INVAL; - goto err_out; + args = (struct pva_ops_executable_register *)input_buffer; + if (args->exec_size + sizeof(struct pva_ops_executable_register) > + size) { + pva_kmd_log_err("Executable register payload size too small"); + return PVA_INVAL; } - exec_data = read_data(in_buffer, args->size); - - err = pva_kmd_verify_exectuable_hash(ctx->pva, (uint8_t *)exec_data, - args->size); + exec_data = (uint8_t *)(pva_offset_const_ptr( + input_buffer, sizeof(struct pva_ops_executable_register))); + err = pva_kmd_verify_exectuable_hash( + ctx->pva, + (uint8_t *)(pva_offset_const_ptr( + input_buffer, + sizeof(struct pva_ops_executable_register))), + args->exec_size); if (err != PVA_SUCCESS) { goto err_out; } err = pva_kmd_add_vpu_bin_resource(&ctx->ctx_resource_table, exec_data, - args->size, &resource_id); + args->exec_size, &resource_id); if (err == PVA_SUCCESS) { rec = pva_kmd_use_resource(&ctx->ctx_resource_table, resource_id); @@ -209,55 +218,40 @@ static enum pva_error pva_kmd_op_executable_register_async( out_args.error = PVA_SUCCESS; out_args.resource_id = resource_id; out_args.num_symbols = num_symbols; - write_data(out_buffer, &out_args, sizeof(out_args)); - return err; + produce_data(out_buffer, &out_args, sizeof(out_args)); + return PVA_SUCCESS; drop_resource: pva_kmd_drop_resource(&ctx->ctx_resource_table, resource_id); err_out: out_args.error = err; - write_data(out_buffer, &out_args, sizeof(out_args)); - return err; + produce_data(out_buffer, &out_args, sizeof(out_args)); + return PVA_SUCCESS; } -static enum pva_error -pva_kmd_op_dma_register_async(struct pva_kmd_context *ctx, - struct pva_kmd_buffer *in_buffer, - struct pva_kmd_buffer *out_buffer, - struct pva_kmd_cmdbuf_builder *cmdbuf_builder) +static enum pva_error pva_kmd_op_dma_register_async( + struct pva_kmd_context *ctx, const void *input_buffer, + uint32_t input_buffer_size, struct pva_kmd_ops_buffer *out_buffer, + struct pva_kmd_cmdbuf_builder *cmdbuf_builder) { enum pva_error err = PVA_SUCCESS; - struct pva_kmd_dma_config_register_in_args *args; - struct pva_kmd_register_out_args out_args = { 0 }; + const struct pva_ops_dma_config_register *args; + struct pva_ops_response_register out_args = { 0 }; struct pva_cmd_update_resource_table *update_cmd; struct pva_resource_entry entry = { 0 }; - void *dma_cfg_data; - uint32_t dma_cfg_payload_size; uint32_t resource_id = 0; - uint32_t dma_config_size = 0; - if (!access_ok(out_buffer, sizeof(struct pva_kmd_register_out_args))) { + if (!access_ok(out_buffer, sizeof(struct pva_ops_response_register))) { return PVA_INVAL; } - if (!access_ok(in_buffer, - sizeof(struct pva_kmd_dma_config_register_in_args))) { + if (input_buffer_size < sizeof(struct pva_ops_dma_config_register)) { + pva_kmd_log_err("DMA ops size too small"); return PVA_INVAL; } - args = read_data(in_buffer, - sizeof(struct pva_kmd_dma_config_register_in_args)); - - dma_cfg_data = &args->dma_config_header; - dma_cfg_payload_size = in_buffer->size - in_buffer->offset; - // Discard the data we are about to pass to pva_kmd_add_dma_config_resource - read_data(in_buffer, dma_cfg_payload_size); - - dma_config_size = - safe_addu32(dma_cfg_payload_size, - (uint32_t)sizeof(args->dma_config_header)); - err = pva_kmd_add_dma_config_resource(&ctx->ctx_resource_table, - dma_cfg_data, dma_config_size, - &resource_id); + args = (const struct pva_ops_dma_config_register *)input_buffer; + err = pva_kmd_add_dma_config_resource(&ctx->ctx_resource_table, args, + input_buffer_size, &resource_id); if (err != PVA_SUCCESS) { goto err_out; } @@ -278,34 +272,38 @@ pva_kmd_op_dma_register_async(struct pva_kmd_context *ctx, out_args.error = PVA_SUCCESS; out_args.resource_id = resource_id; - write_data(out_buffer, &out_args, sizeof(out_args)); + produce_data(out_buffer, &out_args, sizeof(out_args)); return PVA_SUCCESS; drop_dma_config: pva_kmd_drop_resource(&ctx->ctx_resource_table, resource_id); err_out: out_args.error = err; - write_data(out_buffer, &out_args, sizeof(out_args)); + produce_data(out_buffer, &out_args, sizeof(out_args)); /* Error is reported in the output buffer. So we return success here. */ return PVA_SUCCESS; } -static enum pva_error -pva_kmd_op_unregister_async(struct pva_kmd_context *ctx, - struct pva_kmd_buffer *in_buffer, - struct pva_kmd_buffer *out_buffer, - struct pva_kmd_cmdbuf_builder *cmdbuf_builder) +static enum pva_error pva_kmd_op_unregister_async( + struct pva_kmd_context *ctx, const void *input_buffer, + uint32_t input_buffer_size, struct pva_kmd_ops_buffer *out_buffer, + struct pva_kmd_cmdbuf_builder *cmdbuf_builder) { enum pva_error err = PVA_SUCCESS; - struct pva_kmd_unregister_in_args *args; + const struct pva_ops_unregister *args; struct pva_cmd_unregister_resource *unreg_cmd; - if (!access_ok(in_buffer, sizeof(struct pva_kmd_unregister_in_args))) { - err = PVA_INVAL; - goto err_out; + if (input_buffer_size != sizeof(struct pva_ops_unregister)) { + pva_kmd_log_err("Unregister size is not correct"); + return PVA_INVAL; } - args = read_data(in_buffer, sizeof(struct pva_kmd_unregister_in_args)); + if (!access_ok(out_buffer, + sizeof(struct pva_ops_response_unregister))) { + return PVA_INVAL; + } + + args = (const struct pva_ops_unregister *)input_buffer; unreg_cmd = pva_kmd_reserve_cmd_space(cmdbuf_builder, sizeof(*unreg_cmd)); @@ -325,11 +323,12 @@ err_out: static enum pva_error pva_kmd_async_ops_handler( struct pva_kmd_context *ctx, struct pva_fw_postfence *post_fence, - struct pva_kmd_buffer *in_arg, struct pva_kmd_buffer *out_arg) + struct pva_kmd_ops_buffer *in_arg, struct pva_kmd_ops_buffer *out_arg) { struct pva_kmd_cmdbuf_builder cmdbuf_builder; enum pva_error err = PVA_SUCCESS; uint32_t wait_time = 0; + enum pva_error submit_error = PVA_SUCCESS; //first check if we have space in queue while (pva_kmd_queue_space(&ctx->ctx_queue) == 0) { @@ -346,32 +345,47 @@ static enum pva_error pva_kmd_async_ops_handler( goto out; } - while (access_ok(in_arg, sizeof(struct pva_kmd_op_header))) { - struct pva_kmd_op_header *header = - read_data(in_arg, sizeof(struct pva_kmd_op_header)); + while (access_ok(in_arg, sizeof(struct pva_ops_header))) { + const struct pva_ops_header *header = peek_data(in_arg); + const void *input_buffer; - if (header->op_type >= PVA_KMD_OP_MAX) { + if (!access_ok(in_arg, header->size)) { + pva_kmd_log_err( + "Ops header size is bigger than buffer"); err = PVA_INVAL; goto out; } - switch (header->op_type) { - case PVA_KMD_OP_MEMORY_REGISTER: + input_buffer = consume_data(in_arg, header->size); + if (header->size % sizeof(uint64_t) != 0) { + pva_kmd_log_err( + "PVA operation size is not a multiple of 8"); + err = PVA_INVAL; + goto exit_loop; + } + + switch (header->opcode) { + case PVA_OPS_OPCODE_MEMORY_REGISTER: err = pva_kmd_op_memory_register_async( - ctx, in_arg, out_arg, &cmdbuf_builder); + ctx, input_buffer, header->size, out_arg, + &cmdbuf_builder); break; - case PVA_KMD_OP_EXECUTABLE_REGISTER: + case PVA_OPS_OPCODE_EXECUTABLE_REGISTER: err = pva_kmd_op_executable_register_async( - ctx, in_arg, out_arg, &cmdbuf_builder); + ctx, input_buffer, header->size, out_arg, + &cmdbuf_builder); break; - case PVA_KMD_OP_DMA_CONFIG_REGISTER: - err = pva_kmd_op_dma_register_async( - ctx, in_arg, out_arg, &cmdbuf_builder); + case PVA_OPS_OPCODE_DMA_CONFIG_REGISTER: + err = pva_kmd_op_dma_register_async(ctx, input_buffer, + header->size, + out_arg, + &cmdbuf_builder); break; - case PVA_KMD_OP_UNREGISTER: - err = pva_kmd_op_unregister_async(ctx, in_arg, out_arg, + case PVA_OPS_OPCODE_UNREGISTER: + err = pva_kmd_op_unregister_async(ctx, input_buffer, + header->size, out_arg, &cmdbuf_builder); break; @@ -385,51 +399,51 @@ static enum pva_error pva_kmd_async_ops_handler( } } +exit_loop: /* This fence comes from user, so set the flag to inform FW */ post_fence->flags |= PVA_FW_POSTFENCE_FLAGS_USER_FENCE; - err = pva_kmd_submitter_submit_with_fence(&ctx->submitter, - &cmdbuf_builder, post_fence); - ASSERT(err == PVA_SUCCESS); + submit_error = pva_kmd_submitter_submit_with_fence( + &ctx->submitter, &cmdbuf_builder, post_fence); + ASSERT(submit_error == PVA_SUCCESS); out: return err; } -static enum pva_error pva_kmd_op_context_init(struct pva_kmd_context *ctx, - struct pva_kmd_buffer *in_buffer, - struct pva_kmd_buffer *out_buffer) +static enum pva_error +pva_kmd_op_context_init(struct pva_kmd_context *ctx, const void *input_buffer, + uint32_t input_buffer_size, + struct pva_kmd_ops_buffer *out_buffer) { - struct pva_kmd_context_init_in_args *ctx_init_args; - struct pva_kmd_context_init_out_args ctx_init_out = { 0 }; + const struct pva_ops_context_init *ctx_init_args; + struct pva_ops_response_context_init ctx_init_out = { 0 }; enum pva_error err; - if (!access_ok(in_buffer, - sizeof(struct pva_kmd_context_init_in_args))) { + if (input_buffer_size != sizeof(struct pva_ops_context_init)) { + pva_kmd_log_err("Context init size is not correct"); return PVA_INVAL; } if (!access_ok(out_buffer, - sizeof(struct pva_kmd_context_init_out_args))) { + sizeof(struct pva_ops_response_context_init))) { return PVA_INVAL; } - ctx_init_args = read_data(in_buffer, - sizeof(struct pva_kmd_context_init_in_args)); + ctx_init_args = (const struct pva_ops_context_init *)input_buffer; err = pva_kmd_context_init(ctx, ctx_init_args->resource_table_capacity); ctx_init_out.error = err; ctx_init_out.ccq_shm_hdl = (uint64_t)ctx->ccq_shm_handle; - write_data(out_buffer, &ctx_init_out, sizeof(ctx_init_out)); + produce_data(out_buffer, &ctx_init_out, sizeof(ctx_init_out)); - return err; + return PVA_SUCCESS; } -static enum pva_error -pva_kmd_op_syncpt_register_async(struct pva_kmd_context *ctx, - struct pva_kmd_buffer *in_buffer, - struct pva_kmd_buffer *out_buffer, - struct pva_kmd_cmdbuf_builder *cmdbuf_builder) +static enum pva_error pva_kmd_op_syncpt_register_async( + struct pva_kmd_context *ctx, const void *input_buffer, + uint32_t input_buffer_size, struct pva_kmd_ops_buffer *out_buffer, + struct pva_kmd_cmdbuf_builder *cmdbuf_builder) { enum pva_error err; struct pva_syncpt_rw_info *syncpts; @@ -437,7 +451,17 @@ pva_kmd_op_syncpt_register_async(struct pva_kmd_context *ctx, uint32_t resource_id = 0; struct pva_cmd_update_resource_table *update_cmd; struct pva_resource_entry entry = { 0 }; - struct pva_kmd_syncpt_register_out_args syncpt_register_out = { 0 }; + struct pva_ops_response_syncpt_register syncpt_register_out = { 0 }; + + if (input_buffer_size != sizeof(struct pva_ops_syncpt_register)) { + pva_kmd_log_err("Syncpt register size is not correct"); + return PVA_INVAL; + } + + if (!access_ok(out_buffer, + sizeof(struct pva_ops_response_syncpt_register))) { + return PVA_INVAL; + } /* Register RO syncpts */ dev_mem.iova = ctx->pva->syncpt_ro_iova; @@ -496,108 +520,109 @@ pva_kmd_op_syncpt_register_async(struct pva_kmd_context *ctx, err_out: syncpt_register_out.error = err; - write_data(out_buffer, &syncpt_register_out, - sizeof(syncpt_register_out)); - return err; -} - -static enum pva_error pva_kmd_op_queue_create(struct pva_kmd_context *ctx, - struct pva_kmd_buffer *in_arg, - struct pva_kmd_buffer *out_arg) -{ - struct pva_kmd_queue_create_in_args *queue_create_args; - struct pva_kmd_queue_create_out_args queue_out_args = { 0 }; - uint32_t queue_id = PVA_INVALID_QUEUE_ID; - enum pva_error err = PVA_SUCCESS; - - if (!access_ok(in_arg, sizeof(struct pva_kmd_queue_create_in_args))) { - return PVA_INVAL; - } - - if (!access_ok(out_arg, sizeof(struct pva_kmd_queue_create_out_args))) { - return PVA_INVAL; - } - - queue_create_args = - read_data(in_arg, sizeof(struct pva_kmd_queue_create_in_args)); - queue_out_args.error = - pva_kmd_queue_create(ctx, queue_create_args, &queue_id); - if (queue_out_args.error == PVA_SUCCESS) { - queue_out_args.queue_id = queue_id; - } - - if (queue_id >= PVA_MAX_NUM_QUEUES_PER_CONTEXT) { - pva_kmd_log_err("pva_kmd_op_queue_create invalid queue id"); - err = PVA_INVAL; - goto err_out; - } - - pva_kmd_read_syncpt_val(ctx->pva, ctx->syncpt_ids[queue_id], - &queue_out_args.syncpt_fence_counter); - - write_data(out_arg, &queue_out_args, - sizeof(struct pva_kmd_queue_create_out_args)); - -err_out: - return err; -} - -static enum pva_error pva_kmd_op_queue_destroy(struct pva_kmd_context *ctx, - struct pva_kmd_buffer *in_arg, - struct pva_kmd_buffer *out_arg) -{ - struct pva_kmd_queue_destroy_in_args *queue_destroy_args; - struct pva_kmd_queue_destroy_out_args queue_out_args = { 0 }; - - if (!access_ok(in_arg, sizeof(struct pva_kmd_queue_destroy_in_args))) { - return PVA_INVAL; - } - - if (!access_ok(out_arg, - sizeof(struct pva_kmd_queue_destroy_out_args))) { - return PVA_INVAL; - } - - queue_destroy_args = - read_data(in_arg, sizeof(struct pva_kmd_queue_destroy_in_args)); - queue_out_args.error = pva_kmd_queue_destroy(ctx, queue_destroy_args); - - write_data(out_arg, &queue_out_args, - sizeof(struct pva_kmd_queue_destroy_out_args)); - + produce_data(out_buffer, &syncpt_register_out, + sizeof(syncpt_register_out)); return PVA_SUCCESS; } static enum pva_error -pva_kmd_op_executable_get_symbols(struct pva_kmd_context *ctx, - struct pva_kmd_buffer *in_arg, - struct pva_kmd_buffer *out_arg) +pva_kmd_op_queue_create(struct pva_kmd_context *ctx, const void *input_buffer, + uint32_t input_buffer_size, + struct pva_kmd_ops_buffer *out_buffer) { - struct pva_kmd_executable_get_symbols_in_args *sym_in_args; - struct pva_kmd_executable_get_symbols_out_args sym_out_args = { 0 }; + const struct pva_ops_queue_create *queue_create_args; + struct pva_ops_response_queue_create queue_out_args = { 0 }; + uint32_t queue_id = PVA_INVALID_QUEUE_ID; + enum pva_error err = PVA_SUCCESS; + + if (input_buffer_size != sizeof(struct pva_ops_queue_create)) { + pva_kmd_log_err("Queue create size is not correct"); + return PVA_INVAL; + } + + if (!access_ok(out_buffer, + sizeof(struct pva_ops_response_queue_create))) { + return PVA_INVAL; + } + + queue_create_args = (const struct pva_ops_queue_create *)input_buffer; + err = pva_kmd_queue_create(ctx, queue_create_args, &queue_id); + + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Failed to create queue"); + goto out; + } + + queue_out_args.error = err; + queue_out_args.queue_id = queue_id; + pva_kmd_read_syncpt_val(ctx->pva, ctx->syncpt_ids[queue_id], + &queue_out_args.syncpt_fence_counter); + +out: + produce_data(out_buffer, &queue_out_args, + sizeof(struct pva_ops_response_queue_create)); + return PVA_SUCCESS; +} + +static enum pva_error +pva_kmd_op_queue_destroy(struct pva_kmd_context *ctx, const void *input_buffer, + uint32_t input_buffer_size, + struct pva_kmd_ops_buffer *out_buffer) +{ + const struct pva_ops_queue_destroy *queue_destroy_args; + struct pva_ops_response_queue_destroy queue_out_args = { 0 }; + + if (input_buffer_size != sizeof(struct pva_ops_queue_destroy)) { + pva_kmd_log_err("Queue destroy size is not correct"); + return PVA_INVAL; + } + + if (!access_ok(out_buffer, + sizeof(struct pva_ops_response_queue_destroy))) { + return PVA_INVAL; + } + + queue_destroy_args = (const struct pva_ops_queue_destroy *)input_buffer; + queue_out_args.error = + pva_kmd_queue_destroy(ctx, queue_destroy_args->queue_id); + + produce_data(out_buffer, &queue_out_args, + sizeof(struct pva_ops_response_queue_destroy)); + + return PVA_SUCCESS; +} + +static enum pva_error pva_kmd_op_executable_get_symbols( + struct pva_kmd_context *ctx, const void *input_buffer, + uint32_t input_buffer_size, struct pva_kmd_ops_buffer *out_buffer) +{ + const struct pva_ops_executable_get_symbols *sym_in_args; + struct pva_ops_response_executable_get_symbols sym_out_args = { 0 }; struct pva_kmd_resource_record *rec; enum pva_error err = PVA_SUCCESS; uint32_t table_size = 0; uint32_t size = 0; - if (!access_ok(in_arg, - sizeof(struct pva_kmd_executable_get_symbols_in_args))) { + if (input_buffer_size != + sizeof(struct pva_ops_executable_get_symbols)) { + pva_kmd_log_err("Executable get symbols size is not correct"); return PVA_INVAL; } - if (!access_ok(out_arg, - sizeof(struct pva_kmd_executable_get_symbols_out_args))) { + if (!access_ok(out_buffer, + sizeof(struct pva_ops_response_executable_get_symbols))) { return PVA_INVAL; } - sym_in_args = read_data( - in_arg, sizeof(struct pva_kmd_executable_get_symbols_in_args)); + sym_in_args = + (const struct pva_ops_executable_get_symbols *)input_buffer; + rec = pva_kmd_use_resource(&ctx->ctx_resource_table, sym_in_args->exec_resource_id); if (rec == NULL) { err = PVA_INVAL; - pva_kmd_log_err("pva_kmd_use_resource failed"); - goto err_out; + pva_kmd_log_err("Invalid resource ID"); + goto err_response; } if (rec->type != PVA_RESOURCE_TYPE_EXEC_BIN) { err = PVA_INVAL; @@ -609,41 +634,40 @@ pva_kmd_op_executable_get_symbols(struct pva_kmd_context *ctx, sizeof(struct pva_symbol_info)); size = safe_addu32( table_size, - sizeof(struct pva_kmd_executable_get_symbols_out_args)); - if (!access_ok(out_arg, size)) { + sizeof(struct pva_ops_response_executable_get_symbols)); + if (!access_ok(out_buffer, size)) { err = PVA_INVAL; goto err_drop; } - sym_out_args.error = err; + sym_out_args.error = PVA_SUCCESS; sym_out_args.num_symbols = rec->vpu_bin.symbol_table.n_symbols; - write_data(out_arg, &sym_out_args, sizeof(sym_out_args)); - write_data(out_arg, rec->vpu_bin.symbol_table.symbols, table_size); - + produce_data(out_buffer, &sym_out_args, sizeof(sym_out_args)); + produce_data(out_buffer, rec->vpu_bin.symbol_table.symbols, table_size); pva_kmd_drop_resource(&ctx->ctx_resource_table, sym_in_args->exec_resource_id); - return PVA_SUCCESS; err_drop: pva_kmd_drop_resource(&ctx->ctx_resource_table, sym_in_args->exec_resource_id); -err_out: +err_response: sym_out_args.error = err; - write_data(out_arg, &sym_out_args, sizeof(sym_out_args)); - return err; + sym_out_args.num_symbols = 0; + produce_data(out_buffer, &sym_out_args, sizeof(sym_out_args)); + return PVA_SUCCESS; } typedef enum pva_error (*pva_kmd_async_op_func_t)( - struct pva_kmd_context *ctx, struct pva_kmd_buffer *in_buffer, - struct pva_kmd_buffer *out_buffer, + struct pva_kmd_context *ctx, const void *input_buffer, + uint32_t input_buffer_size, struct pva_kmd_ops_buffer *out_buffer, struct pva_kmd_cmdbuf_builder *cmdbuf_builder); static enum pva_error -pva_kmd_op_synced_submit(struct pva_kmd_context *ctx, - struct pva_kmd_buffer *in_buffer, - struct pva_kmd_buffer *out_buffer, +pva_kmd_op_synced_submit(struct pva_kmd_context *ctx, const void *input_buffer, + uint32_t input_buffer_size, + struct pva_kmd_ops_buffer *out_buffer, pva_kmd_async_op_func_t async_op_func) { enum pva_error err = PVA_SUCCESS; @@ -655,7 +679,8 @@ pva_kmd_op_synced_submit(struct pva_kmd_context *ctx, goto err_out; } - err = async_op_func(ctx, in_buffer, out_buffer, &cmdbuf_builder); + err = async_op_func(ctx, input_buffer, input_buffer_size, out_buffer, + &cmdbuf_builder); if (err != PVA_SUCCESS) { goto cancel_submit; } @@ -680,58 +705,77 @@ err_out: return err; } -static enum pva_error pva_kmd_sync_ops_handler(struct pva_kmd_context *ctx, - struct pva_kmd_buffer *in_arg, - struct pva_kmd_buffer *out_arg) +static enum pva_error +pva_kmd_sync_ops_handler(struct pva_kmd_context *ctx, + struct pva_kmd_ops_buffer *in_arg, + struct pva_kmd_ops_buffer *out_arg) { enum pva_error err = PVA_SUCCESS; - struct pva_kmd_op_header *header; + const struct pva_ops_header *header; + const void *input_buffer; + uint32_t input_buffer_size; - if (ctx->pva->recovery) { - pva_kmd_log_err("In Recovery state, do not accept ops"); + if (!access_ok(in_arg, sizeof(struct pva_ops_header))) { err = PVA_INVAL; goto out; } - if (!access_ok(in_arg, sizeof(struct pva_kmd_op_header))) { + header = peek_data(in_arg); + + if (!access_ok(in_arg, header->size)) { err = PVA_INVAL; goto out; } - header = read_data(in_arg, sizeof(struct pva_kmd_op_header)); + input_buffer = consume_data(in_arg, header->size); + input_buffer_size = header->size; - switch (header->op_type) { - case PVA_KMD_OP_CONTEXT_INIT: - err = pva_kmd_op_context_init(ctx, in_arg, out_arg); + if (input_buffer_size % sizeof(uint64_t) != 0) { + pva_kmd_log_err("PVA operation size is not a multiple of 8"); + err = PVA_INVAL; + goto out; + } + + switch (header->opcode) { + case PVA_OPS_OPCODE_CONTEXT_INIT: + err = pva_kmd_op_context_init(ctx, input_buffer, + input_buffer_size, out_arg); break; - case PVA_KMD_OP_QUEUE_CREATE: - err = pva_kmd_op_queue_create(ctx, in_arg, out_arg); + case PVA_OPS_OPCODE_QUEUE_CREATE: + err = pva_kmd_op_queue_create(ctx, input_buffer, + input_buffer_size, out_arg); break; - case PVA_KMD_OP_QUEUE_DESTROY: - err = pva_kmd_op_queue_destroy(ctx, in_arg, out_arg); + case PVA_OPS_OPCODE_QUEUE_DESTROY: + err = pva_kmd_op_queue_destroy(ctx, input_buffer, + input_buffer_size, out_arg); break; - case PVA_KMD_OP_EXECUTABLE_GET_SYMBOLS: - err = pva_kmd_op_executable_get_symbols(ctx, in_arg, out_arg); + case PVA_OPS_OPCODE_EXECUTABLE_GET_SYMBOLS: + err = pva_kmd_op_executable_get_symbols( + ctx, input_buffer, input_buffer_size, out_arg); break; - case PVA_KMD_OP_MEMORY_REGISTER: + case PVA_OPS_OPCODE_MEMORY_REGISTER: err = pva_kmd_op_synced_submit( - ctx, in_arg, out_arg, pva_kmd_op_memory_register_async); + ctx, input_buffer, input_buffer_size, out_arg, + pva_kmd_op_memory_register_async); break; - case PVA_KMD_OP_SYNPT_REGISTER: + case PVA_OPS_OPCODE_SYNCPT_REGISTER: err = pva_kmd_op_synced_submit( - ctx, in_arg, out_arg, pva_kmd_op_syncpt_register_async); + ctx, input_buffer, input_buffer_size, out_arg, + pva_kmd_op_syncpt_register_async); break; - case PVA_KMD_OP_EXECUTABLE_REGISTER: + case PVA_OPS_OPCODE_EXECUTABLE_REGISTER: err = pva_kmd_op_synced_submit( - ctx, in_arg, out_arg, + ctx, input_buffer, input_buffer_size, out_arg, pva_kmd_op_executable_register_async); break; - case PVA_KMD_OP_DMA_CONFIG_REGISTER: - err = pva_kmd_op_synced_submit(ctx, in_arg, out_arg, + case PVA_OPS_OPCODE_DMA_CONFIG_REGISTER: + err = pva_kmd_op_synced_submit(ctx, input_buffer, + input_buffer_size, out_arg, pva_kmd_op_dma_register_async); break; - case PVA_KMD_OP_UNREGISTER: - err = pva_kmd_op_synced_submit(ctx, in_arg, out_arg, + case PVA_OPS_OPCODE_UNREGISTER: + err = pva_kmd_op_synced_submit(ctx, input_buffer, + input_buffer_size, out_arg, pva_kmd_op_unregister_async); break; default: @@ -744,38 +788,28 @@ out: } enum pva_error pva_kmd_ops_handler(struct pva_kmd_context *ctx, + enum pva_ops_submit_mode mode, + struct pva_fw_postfence *postfence, void const *ops_buffer, uint32_t ops_size, - void *response, - uint32_t response_buffer_size, + void *resp_buffer, uint32_t resp_buffer_size, uint32_t *out_response_size) + { - struct pva_kmd_operations *ops; - struct pva_kmd_buffer in_buffer = { 0 }, out_buffer = { 0 }; + struct pva_kmd_ops_buffer in_buffer = { 0 }, out_buffer = { 0 }; enum pva_error err = PVA_SUCCESS; - struct pva_kmd_response_header *resp_hdr; + + if (ctx->pva->recovery) { + pva_kmd_log_err("PVA firmware aborted. No KMD ops allowed."); + return PVA_ERR_FW_ABORTED; + } in_buffer.base = ops_buffer; in_buffer.size = ops_size; - out_buffer.base = response; - out_buffer.size = response_buffer_size; + out_buffer.base = resp_buffer; + out_buffer.size = resp_buffer_size; - if (!access_ok(&in_buffer, sizeof(struct pva_kmd_operations))) { - err = PVA_INVAL; - goto out; - } - - if (!access_ok(&out_buffer, sizeof(struct pva_kmd_response_header))) { - err = PVA_INVAL; - goto out; - } - - resp_hdr = - read_data(&out_buffer, sizeof(struct pva_kmd_response_header)); - - ops = read_data(&in_buffer, sizeof(struct pva_kmd_operations)); - - if (ops->mode == PVA_KMD_OPS_MODE_SYNC) { + if (mode == PVA_OPS_SUBMIT_MODE_SYNC) { /* Process one sync operation */ err = pva_kmd_sync_ops_handler(ctx, &in_buffer, &out_buffer); @@ -786,13 +820,10 @@ enum pva_error pva_kmd_ops_handler(struct pva_kmd_context *ctx, * - DMA configuration registration * - unregister */ - err = pva_kmd_async_ops_handler(ctx, &ops->postfence, - &in_buffer, &out_buffer); + err = pva_kmd_async_ops_handler(ctx, postfence, &in_buffer, + &out_buffer); } - //Update the size of the responses in the response header. - // This size also include the header size. - resp_hdr->rep_size = out_buffer.offset; -out: + *out_response_size = out_buffer.offset; return err; } diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.h index 6c4d1544..c730b8f3 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_op_handler.h @@ -27,10 +27,13 @@ * response will be written. * * @return pva_error indicating the success or failure of the operation. +* +* @Note that the input buffer and output buffer should never alias. */ -enum pva_error pva_kmd_ops_handler(struct pva_kmd_context *ctx, void const *ops, - uint32_t ops_size, void *response, - uint32_t response_buffer_size, - uint32_t *out_response_size); +enum pva_error +pva_kmd_ops_handler(struct pva_kmd_context *ctx, enum pva_ops_submit_mode mode, + struct pva_fw_postfence *postfence, void const *ops_buffer, + uint32_t ops_size, void *response, + uint32_t response_buffer_size, uint32_t *out_response_size); #endif // PVA_KMD_OP_HANDLER_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c index 676d909a..d579acb5 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +#include "pva_kmd.h" #include "pva_kmd_utils.h" #include "pva_fw.h" #include "pva_kmd_device_memory.h" @@ -119,10 +120,9 @@ end: return err; } -enum pva_error -pva_kmd_queue_create(struct pva_kmd_context *ctx, - struct pva_kmd_queue_create_in_args *in_args, - uint32_t *queue_id) +enum pva_error pva_kmd_queue_create(struct pva_kmd_context *ctx, + const struct pva_ops_queue_create *in_args, + uint32_t *queue_id) { struct pva_kmd_device_memory *submission_mem_kmd = NULL; struct pva_kmd_queue *queue = NULL; @@ -205,9 +205,8 @@ err_out: return err; } -enum pva_error -pva_kmd_queue_destroy(struct pva_kmd_context *ctx, - struct pva_kmd_queue_destroy_in_args *in_args) +enum pva_error pva_kmd_queue_destroy(struct pva_kmd_context *ctx, + uint32_t queue_id) { struct pva_kmd_queue *queue; enum pva_error err = PVA_SUCCESS; @@ -218,8 +217,7 @@ pva_kmd_queue_destroy(struct pva_kmd_context *ctx, * This call needs to be added after syncpoint and ccq functions are ready. */ pva_kmd_mutex_lock(&ctx->queue_allocator.allocator_lock); - queue = pva_kmd_get_block_unsafe(&ctx->queue_allocator, - in_args->queue_id); + queue = pva_kmd_get_block_unsafe(&ctx->queue_allocator, queue_id); if (queue == NULL) { pva_kmd_mutex_unlock(&ctx->queue_allocator.allocator_lock); return PVA_INVAL; @@ -240,7 +238,7 @@ pva_kmd_queue_destroy(struct pva_kmd_context *ctx, pva_kmd_queue_deinit(queue); pva_kmd_mutex_unlock(&ctx->queue_allocator.allocator_lock); - err = pva_kmd_free_block(&ctx->queue_allocator, in_args->queue_id); + err = pva_kmd_free_block(&ctx->queue_allocator, queue_id); ASSERT(err == PVA_SUCCESS); return PVA_SUCCESS; } diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h index 470ec3f7..0861a557 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_queue.h @@ -22,14 +22,12 @@ void pva_kmd_queue_init(struct pva_kmd_queue *queue, struct pva_kmd_device *pva, pva_kmd_mutex_t *ccq_lock, struct pva_kmd_device_memory *queue_memory, uint32_t max_num_submit); -enum pva_error -pva_kmd_queue_create(struct pva_kmd_context *ctx, - struct pva_kmd_queue_create_in_args *in_args, - uint32_t *queue_id); +enum pva_error pva_kmd_queue_create(struct pva_kmd_context *ctx, + const struct pva_ops_queue_create *in_args, + uint32_t *queue_id); -enum pva_error -pva_kmd_queue_destroy(struct pva_kmd_context *ctx, - struct pva_kmd_queue_destroy_in_args *in_args); +enum pva_error pva_kmd_queue_destroy(struct pva_kmd_context *ctx, + uint32_t queue_id); enum pva_error pva_kmd_queue_submit(struct pva_kmd_queue *queue, diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c index adb78519..83d97010 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.c @@ -343,6 +343,7 @@ void pva_kmd_drop_resource_unsafe(struct pva_kmd_resource_table *resource_table, &resource_table->resource_record_allocator, resource_id); if (rec == NULL) { + pva_kmd_log_err_u64("Unexpected resource ID drop", resource_id); return; } @@ -354,7 +355,7 @@ void pva_kmd_drop_resource_unsafe(struct pva_kmd_resource_table *resource_table, enum pva_error pva_kmd_add_vpu_bin_resource(struct pva_kmd_resource_table *resource_table, - void *executable, uint32_t executable_size, + const void *executable, uint32_t executable_size, uint32_t *out_resource_id) { uint32_t res_id; @@ -441,7 +442,8 @@ pva_kmd_make_resource_entry(struct pva_kmd_resource_table *resource_table, } enum pva_error pva_kmd_add_dma_config_resource( - struct pva_kmd_resource_table *resource_table, void *dma_config_payload, + struct pva_kmd_resource_table *resource_table, + const struct pva_ops_dma_config_register *dma_cfg_hdr, uint32_t dma_config_size, uint32_t *out_resource_id) { enum pva_error err = PVA_SUCCESS; @@ -465,7 +467,7 @@ enum pva_error pva_kmd_add_dma_config_resource( dma_aux = &resource_table->dma_aux[block_idx]; pva_kmd_mutex_lock(&resource_table->resource_table_lock); - err = pva_kmd_load_dma_config(resource_table, dma_config_payload, + err = pva_kmd_load_dma_config(resource_table, dma_cfg_hdr, dma_config_size, dma_aux, fw_dma_cfg, &fw_fetch_size); pva_kmd_mutex_unlock(&resource_table->resource_table_lock); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.h index 0df42831..957257a3 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_resource_table.h @@ -2,6 +2,7 @@ /* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ #ifndef PVA_KMD_RESOURCE_TABLE_H #define PVA_KMD_RESOURCE_TABLE_H +#include "pva_api_ops.h" #include "pva_fw.h" #include "pva_bit.h" #include "pva_resource.h" @@ -108,13 +109,13 @@ pva_kmd_add_dram_buffer_resource(struct pva_kmd_resource_table *resource_table, enum pva_error pva_kmd_add_vpu_bin_resource(struct pva_kmd_resource_table *resource_table, - void *executable, uint32_t executable_size, + const void *executable, uint32_t executable_size, uint32_t *out_resource_id); -enum pva_error -pva_kmd_add_dma_config_resource(struct pva_kmd_resource_table *resource_table, - void *dma_config, uint32_t dma_config_size, - uint32_t *out_resource_id); +enum pva_error pva_kmd_add_dma_config_resource( + struct pva_kmd_resource_table *resource_table, + const struct pva_ops_dma_config_register *dma_cfg_hdr, + uint32_t dma_config_size, uint32_t *out_resource_id); /** * Increment reference count of the resources diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.c index 2382d233..366112f0 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.c @@ -3,6 +3,8 @@ #include "pva_kmd_abort.h" #include "pva_kmd_device.h" +#include "pva_kmd_context.h" +#include "pva_kmd_shim_trace_event.h" #include "pva_kmd_shared_buffer.h" static void @@ -81,10 +83,12 @@ err_out: return err; } -enum pva_error pva_kmd_shared_buffer_init( - struct pva_kmd_device *pva, uint8_t interface, uint32_t element_size, - uint32_t num_entries, shared_buffer_process_element_cb process_cb, - shared_buffer_lock_cb lock_cb, shared_buffer_lock_cb unlock_cb) +enum pva_error pva_kmd_shared_buffer_init(struct pva_kmd_device *pva, + uint8_t interface, + uint32_t element_size, + uint32_t num_entries, + shared_buffer_lock_cb lock_cb, + shared_buffer_lock_cb unlock_cb) { enum pva_error err = PVA_SUCCESS; @@ -114,7 +118,6 @@ enum pva_error pva_kmd_shared_buffer_init( buffer->header->tail = 0U; buffer->body = (pva_offset_pointer(buffer->header, sizeof(*buffer->header))); - buffer->process_cb = process_cb; buffer->lock_cb = lock_cb; buffer->unlock_cb = unlock_cb; buffer->resource_offset = 0U; @@ -167,6 +170,69 @@ enum pva_error pva_kmd_shared_buffer_deinit(struct pva_kmd_device *pva, return err; } +static void shared_buffer_process_msg(struct pva_kmd_device *pva, + uint8_t interface, void *msg) +{ + enum pva_error err = PVA_SUCCESS; + struct pva_kmd_fw_buffer_msg_header header; + struct pva_kmd_fw_msg_vpu_trace vpu_trace; + struct pva_kmd_fw_msg_res_unreg unreg_data; + struct pva_kmd_context *ctx = NULL; + void *msg_body; + + ASSERT(msg != NULL); + + // Copy the header + memcpy(&header, msg, sizeof(header)); + uint32_t msg_size = safe_subu32(header.size, sizeof(header)); + msg_body = (uint8_t *)msg + sizeof(header); + + switch (header.type) { + case PVA_KMD_FW_BUF_MSG_TYPE_FW_EVENT: { + // TODO: This must be updated once profiler config is exposed through debugfs. + // KMD must use the same timestamp size as the FW. It is possible that the user + // changes the timestamp size through debugfs after FW logged the event. + // FW must log the type of timestamp it used to capture the event. + ASSERT(msg_size == + sizeof(struct pva_fw_event_message) + + pva->debugfs_context.g_fw_profiling_config + .timestamp_size); + + err = pva_kmd_process_fw_event(pva, msg_body, msg_size); + if (err != PVA_SUCCESS) { + pva_kmd_log_err("Failed to process FW event"); + } + break; + } + case PVA_KMD_FW_BUF_MSG_TYPE_VPU_TRACE: { + ASSERT(msg_size == sizeof(struct pva_kmd_fw_msg_vpu_trace)); + memcpy(&vpu_trace, msg_body, sizeof(vpu_trace)); + // We do not check the profiling level here. FW checks profiling level while logging + // the trace event. If the profiling level was high enough for FW to log the event, + // KMD should trace it. The profiling level might have changed since FW logged the event. + pva_kmd_shim_add_trace_vpu_exec(pva, &vpu_trace); + break; + } + case PVA_KMD_FW_BUF_MSG_TYPE_RES_UNREG: { + ASSERT(msg_size == sizeof(struct pva_kmd_fw_msg_res_unreg)); + memcpy(&unreg_data, msg_body, sizeof(unreg_data)); + ctx = pva_kmd_get_context(pva, interface); + + ASSERT(ctx != NULL); + + // We do not lock the resource table here because this function is intended + // to be called from the shared buffer processing function which should acquire + // the required lock. + pva_kmd_drop_resource_unsafe(&ctx->ctx_resource_table, + unreg_data.resource_id); + break; + } + default: + FAULT("Unexpected message type while processing shared buffer"); + break; + } +} + void pva_kmd_shared_buffer_process(void *pva_dev, uint8_t interface) { struct pva_kmd_device *pva = (struct pva_kmd_device *)pva_dev; @@ -236,7 +302,7 @@ void pva_kmd_shared_buffer_process(void *pva_dev, uint8_t interface) current_element = (void *)&buffer_body[*buffer_head]; // Call the user-provided callback with the current element and context - fw_buffer->process_cb(pva, interface, current_element); + shared_buffer_process_msg(pva, interface, current_element); // Advance the head pointer in a circular buffer fashion *buffer_head = (*buffer_head + element_size) % buffer_size; diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.h index 024815ca..9e9fb48d 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_shared_buffer.h @@ -30,10 +30,12 @@ struct pva_kmd_shared_buffer { uint32_t resource_offset; }; -enum pva_error pva_kmd_shared_buffer_init( - struct pva_kmd_device *pva, uint8_t interface, uint32_t element_size, - uint32_t buffer_size, shared_buffer_process_element_cb process_cb, - shared_buffer_lock_cb lock_cb, shared_buffer_lock_cb unlock_cb); +enum pva_error pva_kmd_shared_buffer_init(struct pva_kmd_device *pva, + uint8_t interface, + uint32_t element_size, + uint32_t buffer_size, + shared_buffer_lock_cb lock_cb, + shared_buffer_lock_cb unlock_cb); enum pva_error pva_kmd_shared_buffer_deinit(struct pva_kmd_device *pva, uint8_t interface); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c index 74365d2c..1fca1632 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_boot.c @@ -81,29 +81,23 @@ void pva_kmd_config_evp_seg_regs(struct pva_kmd_device *pva) * segment registers accordingly * * */ - if (pva->load_from_gsc) { - if (pva->is_hv_mode) { - /* Loading from GSC with HV (i.e AV+L or AV+Q case). - * This will be trapped by HV - */ - pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_lsegreg, - 0xFFFFFFFFU); - pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_usegreg, - 0xFFFFFFFFU); - } else { - /* Loading from GSC without HV i.e L4T case. - * TODO: Program Segment regsites using the GSC Careveout - * fetched from DT file. Till then, ASSERT here. - */ - ASSERT(false); - } - } else { - /* Loading from file. - * In HV case, traps should be bypassed in HV + if (pva->load_from_gsc && pva->is_hv_mode) { + /* Loading from GSC with HV (i.e AV+L or AV+Q case). + * This will be trapped by HV */ + pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_lsegreg, + 0xFFFFFFFFU); + pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_usegreg, + 0xFFFFFFFFU); + } else { + /* underflow is totally OK */ seg_reg_value = - pva->fw_bin_mem->iova - - FW_CODE_DATA_START_ADDR; /* underflow is totally OK */ + pva->load_from_gsc ? + pva->fw_carveout.base_va - + FW_CODE_DATA_START_ADDR : /* Load from GSC in L4T case */ + pva->fw_bin_mem->iova - + FW_CODE_DATA_START_ADDR; /* Boot from File case */ + pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_lsegreg, iova_lo(seg_reg_value)); pva_kmd_write(pva, pva->regspec.cfg_priv_ar1_usegreg, diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_executable.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_executable.c index 8e15bb42..19fdbd64 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_executable.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_silicon_executable.c @@ -775,7 +775,7 @@ out: } enum pva_error -pva_kmd_load_executable(void *executable_data, uint32_t executable_size, +pva_kmd_load_executable(const void *executable_data, uint32_t executable_size, struct pva_kmd_device *pva, uint8_t dma_smmu_id, struct pva_kmd_exec_symbol_table *out_symbol_table, struct pva_kmd_device_memory **out_metainfo, diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.c index 60c47489..4e4f6030 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_tegra_stats.c @@ -35,6 +35,26 @@ void pva_kmd_device_deinit_tegra_stats(struct pva_kmd_device *pva) pva->tegra_stats_resource_id); } +static uint64_t calc_vpu_utilization(uint64_t total_utilization, + uint64_t duration) +{ + if (duration == 0) { + return 0; + } else { + /* tegrastats expects 10000 scale */ + pva_math_error err = MATH_OP_SUCCESS; + uint64_t util = + mulu64(10000ULL, total_utilization, &err) / duration; + + if (err != MATH_OP_SUCCESS) { + pva_kmd_log_err( + "Overflow when computing VPU utilization"); + } + + return util; + } +} + enum pva_error pva_kmd_notify_fw_get_tegra_stats(struct pva_kmd_device *pva, struct pva_kmd_tegrastats *kmd_tegra_stats) @@ -44,22 +64,31 @@ pva_kmd_notify_fw_get_tegra_stats(struct pva_kmd_device *pva, struct pva_cmd_get_tegra_stats *cmd; uint64_t buffer_offset = 0U; uint32_t fence_val; - enum pva_error err; - struct pva_kmd_fw_tegrastats *fw_tegra_stats; + enum pva_error err = PVA_SUCCESS; + struct pva_kmd_fw_tegrastats fw_tegra_stats = { 0 }; bool stats_enabled = pva->debugfs_context.stats_enable; uint64_t duration = 0U; + if (stats_enabled == false) { + pva_kmd_log_err("Tegra stats are disabled"); + goto err_out; + } + + if (!pva_kmd_device_maybe_on(pva)) { + goto out; + } + /* Power on PVA if not already */ err = pva_kmd_device_busy(pva); if (err != PVA_SUCCESS) { pva_kmd_log_err( "pva_kmd_device_busy failed when submitting tegra stats cmd"); - return err; + goto err_out; } err = pva_kmd_submitter_prepare(dev_submitter, &builder); if (err != PVA_SUCCESS) { - goto err_out; + goto dev_idle; } cmd = pva_kmd_reserve_cmd_space(&builder, sizeof(*cmd)); ASSERT(cmd != NULL); @@ -80,54 +109,30 @@ pva_kmd_notify_fw_get_tegra_stats(struct pva_kmd_device *pva, if (err != PVA_SUCCESS) { pva_kmd_log_err( "Waiting for FW timed out when getting tegra stats"); - goto err_out; + goto dev_idle; } - if (stats_enabled == false) - goto err_out; + memcpy(&fw_tegra_stats, pva->tegra_stats_memory->va, + sizeof(fw_tegra_stats)); - fw_tegra_stats = - (struct pva_kmd_fw_tegrastats *)(pva->tegra_stats_memory->va); + pva_kmd_device_idle(pva); - duration = safe_subu64(fw_tegra_stats->window_end_time, - fw_tegra_stats->window_start_time); - if (duration == 0) { - pva_kmd_print_str("VPU Stats: Duration is zero"); - goto err_out; - } +out: + duration = sat_sub64(fw_tegra_stats.window_end_time, + fw_tegra_stats.window_start_time); - pva_kmd_print_str("VPU Stats"); - pva_kmd_print_str_u64("Window Start Time", - fw_tegra_stats->window_start_time); - pva_kmd_print_str_u64("Window End Time", - fw_tegra_stats->window_end_time); - pva_kmd_print_str_u64("Total utilization VPU 0", - fw_tegra_stats->total_utilization[0]); - pva_kmd_print_str_u64("Total utilization VPU 1", - fw_tegra_stats->total_utilization[1]); - pva_kmd_print_str_u64( - "VPU 0 percent utilization", - safe_mulu64(100ULL, fw_tegra_stats->total_utilization[0]) / - duration); - pva_kmd_print_str_u64( - "VPU 1 percent utilization", - safe_mulu64(100ULL, fw_tegra_stats->total_utilization[1]) / - duration); - - kmd_tegra_stats->average_vpu_utilization[0] = - safe_mulu64(100ULL, fw_tegra_stats->total_utilization[0]) / - duration; - kmd_tegra_stats->average_vpu_utilization[1] = - safe_mulu64(100ULL, fw_tegra_stats->total_utilization[1]) / - duration; - kmd_tegra_stats->window_start_time = fw_tegra_stats->window_start_time; - kmd_tegra_stats->window_end_time = fw_tegra_stats->window_end_time; - - err = PVA_SUCCESS; + kmd_tegra_stats->average_vpu_utilization[0] = calc_vpu_utilization( + fw_tegra_stats.total_utilization[0], duration); + kmd_tegra_stats->average_vpu_utilization[1] = calc_vpu_utilization( + fw_tegra_stats.total_utilization[1], duration); + kmd_tegra_stats->window_start_time = fw_tegra_stats.window_start_time; + kmd_tegra_stats->window_end_time = fw_tegra_stats.window_end_time; + return PVA_SUCCESS; cancel_builder: pva_kmd_cmdbuf_builder_cancel(&builder); -err_out: +dev_idle: pva_kmd_device_idle(pva); +err_out: return err; } diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_userspace_misc.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_userspace_misc.c index 9365fdf7..8a0cf2bf 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_userspace_misc.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_userspace_misc.c @@ -48,7 +48,8 @@ void pva_kmd_free(void *ptr) void pva_kmd_fault(void) { - abort(); + pva_kmd_log_err("PVA KMD fault"); + exit(1); } void pva_kmd_sema_init(pva_kmd_sema_t *sem, uint32_t val) diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.c b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.c index 8b919517..ee4438f2 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.c +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.c @@ -40,6 +40,7 @@ enum pva_error pva_kmd_init_vpu_app_auth(struct pva_kmd_device *pva, bool ena) if (default_path_len > 0U) { (void)memcpy(pva_auth->pva_auth_allowlist_path, default_path, default_path_len); + pva_auth->pva_auth_allowlist_path[default_path_len] = '\0'; } return PVA_SUCCESS; @@ -259,7 +260,8 @@ fail: } enum pva_error pva_kmd_verify_exectuable_hash(struct pva_kmd_device *pva, - uint8_t *dataptr, size_t size) + const uint8_t *dataptr, + size_t size) { enum pva_error err = PVA_SUCCESS; struct pva_vpu_auth *pva_auth; @@ -271,15 +273,15 @@ enum pva_error pva_kmd_verify_exectuable_hash(struct pva_kmd_device *pva, pva_kmd_mutex_lock(&pva_auth->allow_list_lock); if (pva_auth->pva_auth_enable) { - pva_dbg_printf("App authentication enabled"); + pva_dbg_printf("App authentication enabled\n"); if (pva_auth->pva_auth_allow_list_parsed == false) { err = pva_kmd_allowlist_parse(pva); if (err == PVA_SUCCESS) { pva_dbg_printf( - "App authentication allowlist parsing successfull"); + "App authentication allowlist parsing successfull\n"); } else { pva_dbg_printf( - "App authentication allowlist parsing failed"); + "App authentication allowlist parsing failed\n"); } } @@ -289,14 +291,15 @@ enum pva_error pva_kmd_verify_exectuable_hash(struct pva_kmd_device *pva, size); if (err == PVA_SUCCESS) { pva_dbg_printf( - "App authentication successfull"); + "App authentication successfull\n"); } else { - pva_dbg_printf("App authentication failed : %d", - err); + pva_dbg_printf( + "App authentication failed : %d\n", + err); } } } else { - pva_dbg_printf("App authentication disabled"); + pva_dbg_printf("App authentication disabled\n"); } pva_kmd_mutex_unlock(&pva_auth->allow_list_lock); @@ -328,9 +331,12 @@ enum pva_error pva_kmd_allowlist_parse(struct pva_kmd_device *pva) //Destroy previously parsed allowlist data pva_kmd_allowlist_destroy(pva_auth); + pva_dbg_printf("Allowlist path: %s\n", + pva_auth->pva_auth_allowlist_path); err = pva_kmd_auth_allowlist_load( pva, pva_auth->pva_auth_allowlist_path, &data, &size); if (err != PVA_SUCCESS) { + pva_kmd_log_err("Failed to load allowlist\n"); if (data != NULL) { pva_kmd_free(data); } diff --git a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.h b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.h index a590b5fc..19e9a7b8 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/pva_kmd_vpu_app_auth.h @@ -73,7 +73,8 @@ enum pva_error pva_kmd_init_vpu_app_auth(struct pva_kmd_device *pva, bool ena); void pva_kmd_deinit_vpu_app_auth(struct pva_kmd_device *pva); enum pva_error pva_kmd_verify_exectuable_hash(struct pva_kmd_device *pva, - uint8_t *dataptr, size_t size); + const uint8_t *dataptr, + size_t size); enum pva_error pva_kmd_allowlist_parse(struct pva_kmd_device *pva); diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_trace_event.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_trace_event.h new file mode 100644 index 00000000..dd64532c --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_trace_event.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ +#ifndef PVA_KMD_SHIM_TRACE_EVENT_H +#define PVA_KMD_SHIM_TRACE_EVENT_H + +#include "pva_kmd_device.h" + +void pva_kmd_shim_add_trace_vpu_exec( + struct pva_kmd_device *pva, + struct pva_kmd_fw_msg_vpu_trace const *trace_info); + +#endif // PVA_KMD_SHIM_TRACE_EVENT_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_utils.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_utils.h index bc72c6c2..9d062672 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_utils.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_utils.h @@ -61,4 +61,6 @@ static inline uint32_t array_index_nospec(uint32_t index, uint32_t size) } #endif +uint64_t pva_kmd_get_time_tsc(void); + #endif // PVA_KMD_SHIM_UTILS_H diff --git a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_vpu_app_auth.h b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_vpu_app_auth.h index 55440175..e666f91d 100644 --- a/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_vpu_app_auth.h +++ b/drivers/video/tegra/host/pva/src/kmd/common/shim/pva_kmd_shim_vpu_app_auth.h @@ -11,5 +11,6 @@ enum pva_error pva_kmd_auth_allowlist_load(struct pva_kmd_device *pva, const char *file_name, uint8_t **hash_keys_data, uint64_t *psize); - +void pva_kmd_update_allowlist_path(struct pva_kmd_device *pva, + const char *allowlist_path); #endif \ No newline at end of file diff --git a/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h b/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h index 5ec593b0..d8d73cfc 100644 --- a/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h +++ b/drivers/video/tegra/host/pva/src/kmd/include/pva_kmd.h @@ -8,29 +8,40 @@ #include "pva_constants.h" #include "pva_math_utils.h" +#define PVA_OPS_PRIVATE_OPCODE_FLAG (1U << 31U) + /* KMD API: context init */ -struct pva_kmd_context_init_in_args { +struct pva_ops_context_init { +#define PVA_OPS_OPCODE_CONTEXT_INIT (1U | PVA_OPS_PRIVATE_OPCODE_FLAG) + struct pva_ops_header header; uint32_t resource_table_capacity; + uint32_t pad; }; -struct pva_kmd_context_init_out_args { +struct pva_ops_response_context_init { enum pva_error error; uint64_t ccq_shm_hdl; }; -struct pva_kmd_syncpt_register_out_args { +struct pva_ops_syncpt_register { +#define PVA_OPS_OPCODE_SYNCPT_REGISTER (2U | PVA_OPS_PRIVATE_OPCODE_FLAG) + struct pva_ops_header header; +}; + +struct pva_ops_response_syncpt_register { enum pva_error error; uint32_t syncpt_ro_res_id; uint32_t syncpt_rw_res_id; uint32_t synpt_size; uint32_t synpt_ids[PVA_NUM_RW_SYNCPTS_PER_CONTEXT]; uint32_t num_ro_syncpoints; + uint32_t pad; }; /** * Calculates the total memory size required for a PVA submission queue. * This includes the size of the queue header and the combined size of all command buffer submission info structures. - * + * * @param x The number of command buffer submission info structures. * @return The total memory size in bytes. */ @@ -45,132 +56,110 @@ static inline uint32_t pva_get_submission_queue_memory_size(uint32_t x) } /* KMD API: queue create */ -struct pva_kmd_queue_create_in_args { +struct pva_ops_queue_create { +#define PVA_OPS_OPCODE_QUEUE_CREATE (3U | PVA_OPS_PRIVATE_OPCODE_FLAG) + struct pva_ops_header header; uint32_t max_submission_count; uint64_t queue_memory_handle; uint64_t queue_memory_offset; }; -struct pva_kmd_queue_create_out_args { +struct pva_ops_response_queue_create { enum pva_error error; uint32_t queue_id; uint32_t syncpt_fence_counter; }; /* KMD API: queue destroy */ -struct pva_kmd_queue_destroy_in_args { +struct pva_ops_queue_destroy { +#define PVA_OPS_OPCODE_QUEUE_DESTROY (4U | PVA_OPS_PRIVATE_OPCODE_FLAG) + struct pva_ops_header header; uint32_t queue_id; + uint32_t pad; }; -struct pva_kmd_queue_destroy_out_args { +struct pva_ops_response_queue_destroy { enum pva_error error; + uint32_t pad; }; -struct pva_kmd_memory_register_in_args { - enum pva_memory_segment segment; - uint32_t access_flags; - uint64_t memory_handle; - uint64_t offset; - uint64_t size; -}; - -/* KMD API: executable */ -struct pva_kmd_executable_register_in_args { - uint32_t size; -}; - -struct pva_kmd_executable_get_symbols_in_args { +struct pva_ops_executable_get_symbols { +#define PVA_OPS_OPCODE_EXECUTABLE_GET_SYMBOLS (5U | PVA_OPS_PRIVATE_OPCODE_FLAG) + struct pva_ops_header header; uint32_t exec_resource_id; + uint32_t pad; }; -struct pva_kmd_executable_get_symbols_out_args { +struct pva_ops_response_executable_get_symbols { enum pva_error error; uint32_t num_symbols; /* Followed by of struct pva_symbol_info */ }; -/* KMD API: DMA config */ -struct pva_kmd_dma_config_register_in_args { - struct pva_dma_config_header dma_config_header; - /* Followed by hwseq words, channels, descriptors, etc. */ -}; - -struct pva_kmd_register_out_args { - enum pva_error error; - uint32_t resource_id; -}; - -struct pva_kmd_exec_register_out_args { - enum pva_error error; - uint32_t resource_id; - uint32_t num_symbols; -}; - -struct pva_kmd_unregister_in_args { - uint32_t resource_id; -}; - -enum pva_kmd_op_type { - PVA_KMD_OP_CONTEXT_INIT, - PVA_KMD_OP_QUEUE_CREATE, - PVA_KMD_OP_QUEUE_DESTROY, - PVA_KMD_OP_EXECUTABLE_GET_SYMBOLS, - PVA_KMD_OP_MEMORY_REGISTER, - PVA_KMD_OP_SYNPT_REGISTER, - PVA_KMD_OP_EXECUTABLE_REGISTER, - PVA_KMD_OP_DMA_CONFIG_REGISTER, - PVA_KMD_OP_UNREGISTER, - PVA_KMD_OP_MAX, +/** + * @brief Structure for memory registration operation. + */ +struct pva_ops_memory_register { +#define PVA_OPS_OPCODE_MEMORY_REGISTER (6U | PVA_OPS_PRIVATE_OPCODE_FLAG) + struct pva_ops_header header; /**< Operation header */ + enum pva_memory_segment segment; /**< Memory segment to register */ + uint32_t access_flags; /**< Memory access flags */ + uint64_t import_id; /**< Import ID of the memory */ + uint64_t offset; /**< Offset into the memory */ + uint64_t size; /**< Size of memory to register */ }; /** - * The header of a KMD operation + * @brief Response structure for memory registration operation. */ -struct pva_kmd_op_header { - enum pva_kmd_op_type op_type; /**< Type of the KMD operation */ +struct pva_ops_response_register { + enum pva_error error; /**< Operation result status */ + uint32_t resource_id; /**< Assigned resource ID */ }; /** - * The header of a KMD response + * @brief Structure for resource unregistration operation. */ -struct pva_kmd_response_header { - uint32_t rep_size; /** Size of the response, including the header */ -}; - -enum pva_kmd_ops_mode { - /** - * Only one operation is allowed. The - * operation will be done synchronously. - * KMD will wait for the fence if - * necessary. */ - PVA_KMD_OPS_MODE_SYNC, - /** - * A list of registration operations are allowed. These operations will - * trigger a post fence. KMD will not wait for the fence. - */ - PVA_KMD_OPS_MODE_ASYNC, +struct pva_ops_unregister { +#define PVA_OPS_OPCODE_UNREGISTER (7U | PVA_OPS_PRIVATE_OPCODE_FLAG) + struct pva_ops_header header; /**< Operation header */ + uint32_t resource_id; /**< ID of resource to unregister */ + uint32_t pad; /**< Padding for 8 bytes alignment */ }; /** - * A buffer contains a list of KMD operations and a post fence. - * - * In general, the list of KMD operations contain jobs that need to be done by - * the KMD and FW. KMD will first perform its part and then submit a privileged - * command buffer to FW. FW will trigger the provided post fence when done. - * - * NOTE: Starting address of every struct/array in the buffer must be aligned to - * 8 bytes. + * @brief Response structure for executable registration operation. */ -struct pva_kmd_operations { - enum pva_kmd_ops_mode mode; - struct pva_fw_postfence postfence; - /** Followed by a list of KMD operation(s) */ +struct pva_ops_response_executable_register { + enum pva_error error; /**< Operation result status */ + uint32_t resource_id; /**< Assigned resource ID */ + uint32_t num_symbols; /**< Number of symbols in executable */ + uint32_t pad; /**< Padding for 8 bytes alignment */ }; -/* Max op buffer sizer is 8 MB */ -#define PVA_KMD_MAX_OP_BUFFER_SIZE (8 * 1024 * 1024) +/** + * @brief Response structure for unregister operation. + */ +struct pva_ops_response_unregister { + enum pva_error error; /**< Operation result status */ + uint32_t pad; /**< Padding for 8 bytes alignment */ +}; -/* Max respone size is 8 KB */ -#define PVA_KMD_MAX_RESP_BUFFER_SIZE (8 * 1024) +enum pva_ops_submit_mode { + PVA_OPS_SUBMIT_MODE_SYNC, + PVA_OPS_SUBMIT_MODE_ASYNC, +}; + +struct pva_dma_config { + struct pva_dma_config_header header; + const uint32_t *hwseq_words; + const struct pva_dma_channel *channels; + const struct pva_dma_descriptor *descriptors; + const struct pva_dma_static_binding *static_bindings; +}; + +#define PVA_OPS_CONTEXT_BUFFER_SIZE (1U * 1024U * 1024U) //1MB +#define PVA_KMD_MAX_OP_BUFFER_SIZE (8 * 1024 * 1024) //8MB +#define PVA_KMD_MAX_RESP_BUFFER_SIZE (8 * 1024) //8KB #endif // PVA_KMD_H diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/Kbuild b/drivers/video/tegra/host/pva/src/kmd/linux/Kbuild index 8372206f..453f98fc 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/Kbuild +++ b/drivers/video/tegra/host/pva/src/kmd/linux/Kbuild @@ -1,13 +1,14 @@ -################################### tell Emacs this is a -*- makefile-gmake -*- +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: GPL-2.0-only # -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. # -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. -############################################################################### +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. obj-m := pva_kmd_linux.o diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/include/pva_kmd_linux.h b/drivers/video/tegra/host/pva/src/kmd/linux/include/pva_kmd_linux.h index 9bf781ad..4738ca6f 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/include/pva_kmd_linux.h +++ b/drivers/video/tegra/host/pva/src/kmd/linux/include/pva_kmd_linux.h @@ -24,8 +24,10 @@ struct nvpva_ioctl_part { * The header of request to KMD */ struct pva_kmd_linux_ioctl_header { + enum pva_ops_submit_mode mode; struct nvpva_ioctl_part request; struct nvpva_ioctl_part response; + struct pva_fw_postfence postfence; }; -#endif // PVA_KMD_LINUX_H \ No newline at end of file +#endif // PVA_KMD_LINUX_H diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c index fcac942c..d284a2af 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_debugfs.c @@ -102,6 +102,11 @@ static const struct file_operations pva_linux_debugfs_fops = { .read = debugfs_node_read, .write = debugfs_node_write, .release = debugfs_node_release, + // TODO: maybe we should provide our own llseek implementation + // The problem with default_llseek is that the default handling + // of SET_END may not work unless file size is specified while opening + // the file. + .llseek = default_llseek, }; void pva_kmd_debugfs_create_bool(struct pva_kmd_device *pva, const char *name, @@ -147,8 +152,8 @@ void pva_kmd_debugfs_remove_nodes(struct pva_kmd_device *pva) struct nvpva_device_data *props = device_data->pva_device_properties; struct dentry *de = props->debugfs; - debugfs_lookup_and_remove("stats_enable", de); + debugfs_lookup_and_remove("stats_enabled", de); debugfs_lookup_and_remove("vpu_debug", de); - debugfs_lookup_and_remove("profile_level", de); + debugfs_lookup_and_remove("profiling_level", de); debugfs_lookup_and_remove("vpu_stats", de); } diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c index e2cf82e2..e4b3348f 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.c @@ -235,8 +235,7 @@ void pva_kmd_power_off(struct pva_kmd_device *pva) pva_kmd_linux_device_get_data(pva); struct nvpva_device_data *props = device_data->pva_device_properties; - pm_runtime_mark_last_busy(&props->pdev->dev); - pm_runtime_put(&props->pdev->dev); + // Set reset line before cutting off power /* Power management operation is asynchronous. We don't control when PVA * will really be powered down. However, we need to free memories after @@ -245,6 +244,9 @@ void pva_kmd_power_off(struct pva_kmd_device *pva) reset_control_acquire(props->reset_control); reset_control_assert(props->reset_control); reset_control_release(props->reset_control); + + pm_runtime_mark_last_busy(&props->pdev->dev); + pm_runtime_put(&props->pdev->dev); } void pva_kmd_fw_reset_assert(struct pva_kmd_device *pva) @@ -253,8 +255,8 @@ void pva_kmd_fw_reset_assert(struct pva_kmd_device *pva) pva_kmd_linux_device_get_data(pva); struct nvpva_device_data *props = device_data->pva_device_properties; - /* FW Reset recovery operation is asynchronous. - * we need to free memories after this call. + /* FW Reset recovery operation is asynchronous. + * we need to free memories after this call. * Therefore, we assert the reset line to stop PVA from any * further activity. */ reset_control_acquire(props->reset_control); diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.h b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.h index d87bd355..c4a2c50b 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.h +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device.h @@ -12,7 +12,6 @@ #include "pva_kmd_device.h" #include "pva_kmd_linux_isr.h" -#define NV_PVA0_CLASS_ID 0xF1 #define PVA_KMD_LINUX_MAX_IORESOURCE_MEM 5 extern const struct file_operations tegra_pva_ctrl_ops; diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device_memory.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device_memory.c index 75c0c1c9..c2faea27 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device_memory.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_device_memory.c @@ -86,7 +86,6 @@ pva_kmd_device_memory_acquire(uint64_t memory_handle, uint64_t offset, mem_impl->dmabuf = dma_buf; mem_impl->dev_mem.size = size; mem_impl->offset = offset; - close_fd(memory_handle); return &mem_impl->dev_mem; put_dmabuf: @@ -113,6 +112,7 @@ void pva_kmd_device_memory_free(struct pva_kmd_device_memory *mem) } dma_buf_put(mem_impl->dmabuf); + mem_impl->dmabuf = NULL; } else { /* This memory comes from dma_alloc_coherent */ dev = get_context_device(mem_impl->dev_mem.pva, diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c index 14132907..b58c74c6 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_driver.c @@ -20,6 +20,7 @@ #include #include #include +#include #if KERNEL_VERSION(5, 14, 0) > LINUX_VERSION_CODE #include @@ -189,6 +190,72 @@ static ssize_t clk_cap_show(struct kobject *kobj, struct kobj_attribute *attr, return snprintf(buf, PAGE_SIZE, "%ld\n", max_rate); } +static enum pva_error pva_kmd_get_co_info(struct platform_device *pdev) +{ + struct device_node *np; + const char *status = NULL; + uint32_t reg[4] = { 0 }; + enum pva_error err = PVA_SUCCESS; + struct nvpva_device_data *pva_props = platform_get_drvdata(pdev); + struct pva_kmd_device *pva = pva_props->private_data; + + np = of_find_compatible_node(NULL, NULL, "nvidia,pva-carveout"); + if (np == NULL) { + dev_err(&pdev->dev, "find node failed\n"); + goto err_out; + } + + if (of_property_read_string(np, "status", &status)) { + dev_err(&pdev->dev, "read status failed\n"); + goto err_out; + } + + if (strcmp(status, "okay")) { + dev_err(&pdev->dev, "status compare failed\n"); + goto err_out; + } + + if (of_property_read_u32_array(np, "reg", reg, 4)) { + dev_err(&pdev->dev, "read_32_array failed\n"); + goto err_out; + } + + pva->fw_carveout.base_pa = ((u64)reg[0] << 32 | (u64)reg[1]); + pva->fw_carveout.size = ((u64)reg[2] << 32 | (u64)reg[3]); + + if (iommu_get_domain_for_dev(&pdev->dev)) { + pva->fw_carveout.base_va = + dma_map_resource(&pdev->dev, pva->fw_carveout.base_pa, + pva->fw_carveout.size, + DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(&pdev->dev, pva->fw_carveout.base_va)) { + dev_err(&pdev->dev, "Failed to pin fw_bin_mem CO\n"); + goto err_out; + } + } else { + pva->fw_carveout.base_va = pva->fw_carveout.base_pa; + } + + printk(KERN_INFO "Allocated pva->fw_carveout\n"); + return err; + +err_out: + dev_err(&pdev->dev, "get co fail\n"); + return PVA_INVAL; +} + +static void pva_kmd_free_co_mem(struct platform_device *pdev) +{ + struct nvpva_device_data *pva_props = platform_get_drvdata(pdev); + struct pva_kmd_device *pva = pva_props->private_data; + if (iommu_get_domain_for_dev(&pdev->dev)) { + dma_unmap_resource(&pdev->dev, pva->fw_carveout.base_va, + pva->fw_carveout.size, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); + } +} + static struct kobj_type nvpva_kobj_ktype = { .sysfs_ops = &kobj_sysfs_ops, }; @@ -238,7 +305,8 @@ static int pva_probe(struct platform_device *pdev) pva_device->is_hv_mode = is_tegra_hypervisor_mode(); - /*Force to always boot from file in case of L4T*/ + /* On L4T, forcing boot from file */ + /* If needed to load from GSC, remove the below block */ if (!pva_device->is_hv_mode) { load_from_gsc = false; } @@ -287,6 +355,14 @@ static int pva_probe(struct platform_device *pdev) pva_kmd_debugfs_create_nodes(pva_device); pva_kmd_linux_register_hwpm(pva_device); + if (!pva_device->is_hv_mode && pva_device->load_from_gsc) { + err = pva_kmd_get_co_info(pdev); + if (err != PVA_SUCCESS) { + dev_err(dev, "Failed to get CO info\n"); + goto err_cdev_init; + } + } + if (pva_props->num_clks > 0) { err = kobject_init_and_add(&pva_props->clk_cap_kobj, &nvpva_kobj_ktype, &pdev->dev.kobj, @@ -357,6 +433,10 @@ static int __exit pva_remove(struct platform_device *pdev) kobject_put(&pva_props->clk_cap_kobj); } + if (!pva_device->is_hv_mode && pva_device->load_from_gsc) { + pva_kmd_free_co_mem(pdev); + } + nvpva_device_release(pdev); pva_kmd_debugfs_destroy_nodes(pva_device); pva_kmd_linux_unregister_hwpm(pva_device); diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_event_trace.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_event_trace.c new file mode 100644 index 00000000..149cbabb --- /dev/null +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_event_trace.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +#include "pva_kmd_shim_trace_event.h" +#include "pva_kmd_linux_device.h" +#define CREATE_TRACE_POINTS +#include "trace/events/nvpva_ftrace.h" +#include + +void pva_kmd_shim_add_trace_vpu_exec( + struct pva_kmd_device *pva, + struct pva_kmd_fw_msg_vpu_trace const *trace_info) +{ + uint64_t vpu_start = trace_info->vpu_start_time; + uint64_t vpu_end = trace_info->vpu_end_time; + + // Unlike in PVA V2 stack, submissions do not go through KMD. + // So, the concept of a task being enqueued by KMD does not exist. + // We can request FW to record timestamps of when command buffers + // were submitted to it, but that would introduce a lot of overhead. + uint64_t queue_start = vpu_start; + uint64_t queue_end = vpu_start; + + // In V2, each kernel launch is independent and has a distinct setup + // and teardown phase. In V3, several kernels may share a command buffer + // and it is difficult to distincitly determine the setup and teardown + // phase for each kernel. + // So, we use the vpu_start time as the prepare_start and prepare_end time. + uint64_t prepare_start = vpu_start; + uint64_t prepare_end = vpu_start; + + // In V2, each kernel launch has a distinct postfence. + // In V3, several kernel launches may share a command buffer and therefore + // the same postfence. Using this postfence time for all kernel launches + // may be confusing for the user. So, we use vpu_end time instead. + uint64_t post_start = vpu_end; + uint64_t post_end = vpu_end; + + // In V2, Job ID is a 32-bit value with the top 8 bits being the queue ID + // and the bottom 24 bits being a per-task counter. In V3, we only use the + // queue ID. + uint32_t job_id = (trace_info->queue_id & 0x000000FF) << 24; + + trace_pva_job_ext_event(job_id, trace_info->ccq_id, + 0, // syncpt_thresh, + trace_info->engine_id, queue_start, queue_end, + prepare_start, prepare_end, vpu_start, vpu_end, + post_start, post_end); + + trace_job_submit(NULL, pva_kmd_get_device_class_id(pva), job_id, + trace_info->num_prefences, trace_info->prog_id, + trace_info->submit_id, vpu_start); +} diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_ioctl.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_ioctl.c index fe389ae3..22e4a450 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_ioctl.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_ioctl.c @@ -82,9 +82,10 @@ static long pva_ioctl(struct file *file, unsigned int cmd, unsigned long arg) goto unlock; } - op_err = pva_kmd_ops_handler(ocb->kmd_ctx, ocb->req_buffer, - hdr->request.size, ocb->resp_buffer, - hdr->response.size, &resp_size); + op_err = pva_kmd_ops_handler(ocb->kmd_ctx, hdr->mode, &hdr->postfence, + ocb->req_buffer, hdr->request.size, + ocb->resp_buffer, hdr->response.size, + &resp_size); if (op_err != PVA_SUCCESS) { if (op_err == PVA_NO_RESOURCE_ID || op_err == PVA_NOMEM) { diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_misc.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_misc.c index 4724bd93..8117a83e 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_misc.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_misc.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "pva_kmd_mutex.h" #include "pva_kmd_thread_sema.h" #include "pva_kmd_utils.h" @@ -86,3 +87,15 @@ void pva_kmd_sema_post(pva_kmd_sema_t *sem) { up(sem); } + +uint64_t pva_kmd_get_time_tsc(void) +{ + uint64_t timestamp; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0) + timestamp = arch_timer_read_counter(); +#else + timestamp = arch_counter_get_cntvct(); +#endif + return timestamp; +} diff --git a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_vpu_app_auth.c b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_vpu_app_auth.c index 483a64a7..16c77cfd 100644 --- a/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_vpu_app_auth.c +++ b/drivers/video/tegra/host/pva/src/kmd/linux/pva_kmd_linux_vpu_app_auth.c @@ -74,3 +74,10 @@ const char *pva_kmd_get_default_allowlist(void) { return PVA_AUTH_ALLOW_LIST_DEFAULT; } + +void pva_kmd_update_allowlist_path(struct pva_kmd_device *pva, + const char *allowlist_path) +{ + //Stub definition + pva_dbg_printf("Allow list path update ignored in linux"); +} diff --git a/drivers/video/tegra/host/pva/src/libs/pva/include/pva_math_utils.h b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_math_utils.h index ce557cfb..e9eaa58b 100644 --- a/drivers/video/tegra/host/pva/src/libs/pva/include/pva_math_utils.h +++ b/drivers/video/tegra/host/pva/src/libs/pva/include/pva_math_utils.h @@ -193,6 +193,15 @@ static inline uint8_t safe_add_u8(uint8_t addend1, uint8_t addend2, return (uint8_t)sum; } +static inline uint32_t align8_u32(uint32_t val, pva_math_error *err) +{ + if (val > (UINT32_MAX - 7u)) { + *err = MATH_OP_ERROR; + return 0u; + } + return (val + 7u) & ~(uint32_t)7u; +} + #define safe_addu64(addend1, addend2) \ safe_add_u64((addend1), (addend2), __FILE__, __LINE__)