Updating prebuilts and/or headers

2eba699906039d6615aae4967f6ea79bfe44a40a - event_sample_app/block_pool.c
f3abb0a884f0647204ad32ff51255c4712e52120 - event_sample_app/Makefile
9ee49033e077ac5c8bf458a04c91dd3dbed9633d - event_sample_app/event_loop.h
b33adce6eb1bbc7af23f6c37b6a635479e18a66a - event_sample_app/block_returnsync.c
a56041c06b6bc1d3812b72b399d7d78dd7895485 - event_sample_app/block_limiter.c
ca34c957759f7a010f0cbbbf9bedc03a2c98092b - event_sample_app/block_c2c.c
8d6d0ec3aa8e374a1d2a5fedc9dd24ff7bbdb731 - event_sample_app/block_multicast.c
a76149a2531899e35843d939f60ad8979d8cf65f - event_sample_app/block_consumer_uc1.c
9da8763e4af4b4b7278507a3ebfe2c68a7a24585 - event_sample_app/util.h
2bf7e1383d6e8913c9b0a6a8bdd48fe63d8098d0 - event_sample_app/block_producer_uc1.c
a54abf82eaa2d888e379ab4596ba68ce264e80b5 - event_sample_app/block_info.h
080a6efe263be076c7046e70e31098c2bbed0f6d - event_sample_app/block_presentsync.c
7dd10e5ea71f0c4a09bbe1f9f148f67a13ee098c - event_sample_app/util.c
bc1a6f9017b28e5707c166a658a35e6b3986fdf4 - event_sample_app/usecase1.h
317f43efc59638bf1eae8303f0c79eafb059241a - event_sample_app/block_ipc.c
40361c8f0b68f7d5207db2466ce08c19c0bf1c90 - event_sample_app/event_loop_service.c
efad113d0107e5d8f90146f3102a7c0ed22f1a35 - event_sample_app/event_loop_threads.c
2908615cebcf36330b9850c57e8745bf324867b2 - event_sample_app/block_queue.c
36ed68eca1a7800cf0d94e763c9fc352ee8cda1e - event_sample_app/block_common.c
675f75d61bd0226625a8eaaf0e503c9e976c8d61 - event_sample_app/main.c
c3b26619dd07d221e953fc5dc29a50dcb95a8b97 - rawstream/Makefile
1fbb82e2281bb2e168c87fd20903bbed898ca160 - rawstream/rawstream_cuda.c
1d96498fe3c922f143f7e50e0a32b099714060ad - rawstream/rawstream_consumer.c
d077dafc9176686f6d081026225325c2a303a60e - rawstream/rawstream_producer.c
54ae655edddda7dcabe22fbf0b27c3f617978851 - rawstream/rawstream.h
d5ffeef3c7ad2af6f6f31385db7917b5ef9a7438 - rawstream/rawstream_ipc_linux.c
81e3d6f8ff5252797a7e9e170b74df6255f54f1b - rawstream/rawstream_main.c

Change-Id: I0f4e671693eb0addfe8d0e6532cc8f240cb6c778
This commit is contained in:
svcmobrel-release
2025-09-19 10:10:49 -07:00
parent e927f757fd
commit a3f1b7ea33
31 changed files with 9265 additions and 0 deletions

29
commitFile.txt Normal file
View File

@@ -0,0 +1,29 @@
Updating prebuilts and/or headers
2eba699906039d6615aae4967f6ea79bfe44a40a - event_sample_app/block_pool.c
f3abb0a884f0647204ad32ff51255c4712e52120 - event_sample_app/Makefile
9ee49033e077ac5c8bf458a04c91dd3dbed9633d - event_sample_app/event_loop.h
b33adce6eb1bbc7af23f6c37b6a635479e18a66a - event_sample_app/block_returnsync.c
a56041c06b6bc1d3812b72b399d7d78dd7895485 - event_sample_app/block_limiter.c
ca34c957759f7a010f0cbbbf9bedc03a2c98092b - event_sample_app/block_c2c.c
8d6d0ec3aa8e374a1d2a5fedc9dd24ff7bbdb731 - event_sample_app/block_multicast.c
a76149a2531899e35843d939f60ad8979d8cf65f - event_sample_app/block_consumer_uc1.c
9da8763e4af4b4b7278507a3ebfe2c68a7a24585 - event_sample_app/util.h
2bf7e1383d6e8913c9b0a6a8bdd48fe63d8098d0 - event_sample_app/block_producer_uc1.c
a54abf82eaa2d888e379ab4596ba68ce264e80b5 - event_sample_app/block_info.h
080a6efe263be076c7046e70e31098c2bbed0f6d - event_sample_app/block_presentsync.c
7dd10e5ea71f0c4a09bbe1f9f148f67a13ee098c - event_sample_app/util.c
bc1a6f9017b28e5707c166a658a35e6b3986fdf4 - event_sample_app/usecase1.h
317f43efc59638bf1eae8303f0c79eafb059241a - event_sample_app/block_ipc.c
40361c8f0b68f7d5207db2466ce08c19c0bf1c90 - event_sample_app/event_loop_service.c
efad113d0107e5d8f90146f3102a7c0ed22f1a35 - event_sample_app/event_loop_threads.c
2908615cebcf36330b9850c57e8745bf324867b2 - event_sample_app/block_queue.c
36ed68eca1a7800cf0d94e763c9fc352ee8cda1e - event_sample_app/block_common.c
675f75d61bd0226625a8eaaf0e503c9e976c8d61 - event_sample_app/main.c
c3b26619dd07d221e953fc5dc29a50dcb95a8b97 - rawstream/Makefile
1fbb82e2281bb2e168c87fd20903bbed898ca160 - rawstream/rawstream_cuda.c
1d96498fe3c922f143f7e50e0a32b099714060ad - rawstream/rawstream_consumer.c
d077dafc9176686f6d081026225325c2a303a60e - rawstream/rawstream_producer.c
54ae655edddda7dcabe22fbf0b27c3f617978851 - rawstream/rawstream.h
d5ffeef3c7ad2af6f6f31385db7917b5ef9a7438 - rawstream/rawstream_ipc_linux.c
81e3d6f8ff5252797a7e9e170b74df6255f54f1b - rawstream/rawstream_main.c

View File

@@ -0,0 +1,258 @@
SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: LicenseRef-NvidiaProprietary
NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
property and proprietary rights in and to this material, related
documentation and any modifications thereto. Any use, reproduction,
disclosure or distribution of this material and related documentation
without an express license agreement from NVIDIA CORPORATION or
its affiliates is strictly prohibited.
NvSciStream Event Loop Driven Sample App - README
---
# nvscistream_event_sample - NvSciStream Sample App
## Description
This directory contains an NvSciStream sample application that
supports a variety of use cases, using an event-loop driven model.
Once the stream is fully connected, all further setup and streaming
operations are triggered by events, processed either by a single
NvSciEvent-driven thread or separate threads which wait for events
on each block. The former is the preferred approach for implementing
NvSciStream applications. In addition to those events which NvSci
itself generates, any other event which can be bound to an NvSciEvent
can be added to the event loop. This allows for robust applications
which can handle events regardless of the order in which they occur.
To use this sample for writing your own applications:
* See main.c for examples of how to do top level application setup and
how to select the blocks needed for your use case and connect them
all together.
* See the descriptions in the usecase*.h files to determine which use cases
involve the producer and consumer engines that you are interested in.
* See the appropriate block_*.c files for examples of creating the
necessary blocks and handling the events that they encounter.
See the block_producer_*.c and block_consumer_*.c files for examples of how
to map the relevant engines to and from NvSci.
* See the appropriate event_loop_*.c file for your chosen event handling
method.
## Build the application
The NvSciStream sample includes source code and a Makefile.
Navigate to the sample application directory to build the application:
make clean
make
## Examples of how to run the sample application:
* NOTE:
* Inter-process and inter-chip test cases must be run with sudo.
* NvMedia/CUDA stream (use case 2) of the sample application is not supported
on x86 and Jetson Linux devices.
* Inter-chip use cases are not supported on Jetson Linux devices.
* Update the NvIpc/PCIe endpoint accordingly.
Single-process, single-consumer CUDA/CUDA stream that uses the default event
service:
./nvscistream_event_sample
Single-process, single-consumer stream that uses the threaded event handling:
./nvscistream_event_sample -e t
Single-process NvMedia/CUDA stream with yuv format:
./nvscistream_event_sample -u 2 -s y
Single-process NvMedia/CUDA stream with three consumers, and the second uses
the mailbox mode:
./nvscistream_event_sample -u 2 -m 3 -q 1 m
Multi-process CUDA/CUDA stream with three consumers, one in the same
process as the producer, and the other two in separate processes. The
first and the third consumers use the mailbox mode:
./nvscistream_event_sample -m 3 -p -c 0 -q 0 m &
./nvscistream_event_sample -c 1 -c 2 -q 2 m
Multi-process CUDA/CUDA stream with three consumers, one in the same
process as the producer, and the other two in separate processes.
To simulate the case with a less trusted consumer, one of the consumer
processes is set with lower priority. A limiter block is used to restrict
this consumer to hold at most one packet. The total number of packets is
increased to five.
Linux example:
./nvscistream_event_sample -m 3 -f 5 -p -c 0 -l 2 1 &
./nvscistream_event_sample -c 1 &
nice -n 19 ./nvscistream_event_sample -c 2 &
# Makes the third process as nice as possible.
QNX example:
./nvscistream_event_sample -m 3 -f 5 -p -c 0 -l 2 1 &
./nvscistream_event_sample -c 1 &
nice -n 1 ./nvscistream_event_sample -c 2 &
# Reduces the priority level of the third process by 1.
Multi-process CUDA/CUDA stream with two consumers, one in the same
process as the producer, and the other in a separate processe. Both
processes enable the endpoint information option:
./nvscistream_event_sample -m 2 -p -c 0 -i &
./nvscistream_event_sample -c 1 -i
Multi-process CUDA/CUDA stream with extra validation steps for ASIL-D process
(Not support on x86 or Jetson Linux devices):
./nvscistream_event_sample -u 3 -p &
./nvscistream_event_sample -u 3 -c 0
Multi-process CUDA/CUDA stream using external event service to handle internal
I/O messages acroess process boundary:
./nvscistream_event_sample -p -E &
./nvscistream_event_sample -c 0 -E
Multi-process CUDA/CUDA stream with one consumer on another SoC.
The consumer has the FIFO queue attached to the C2C IpcSrc block, and
a three-packet pool attached to the C2C IpcDst block. It uses IPC channel
nvscic2c_pcie_s0_c5_1 <-> nvscic2c_pcie_s0_c6_1 for C2C communication.
./nvscistream_event_sample -P 0 nvscic2c_pcie_s0_c5_1 -Q 0 f
# Run below command on another OS running on peer SOC.
./nvscistream_event_sample -C 0 nvscic2c_pcie_s0_c6_1 -F 0 3
Multi-process CUDA/CUDA stream with four consumers, one in the same
process as the producer, one in another process but in the same OS as the
producer, and two in another process on another OS running in a peer SoC.
The third and fourth consumers have a mailbox queue attached to the C2C
IpcSrc block, and a five-packet pool attached to the C2C IpcDst block.
The third consumer uses nvscic2c_pcie_s0_c5_1 <-> nvscic2c_pcie_s0_c6_1 for
C2C communication. The 4th consumer uses nvscic2c_pcie_s0_c5_2 <->
nvscic2c_pcie_s0_c6_2 for C2C communication.
./nvscistream_event_sample -m 4 -c 0 -q 0 m -Q 2 m -Q 3 m -P 2 nvscic2c_pcie_s0_c5_1 -P 3 nvscic2c_pcie_s0_c5_2 &
./nvscistream_event_sample -c 1 -q 1 m
# Run below command on another OS running on peer SOC.
./nvscistream_event_sample -C 2 nvscic2c_pcie_s0_c6_1 -q 2 f -F 2 5 -C 3 nvscic2c_pcie_s0_c6_2 -q 3 m -F 3 5
#Example commands for inter-process late attach usecase
Multi-process CUDA/CUDA stream with one early consumer and one late-attached consumer
Producer and early consumer processes are configured to stream 100000 frames, where as
the late-attached consumer process is configured to receive 10000 frames.
# Run the below commands to launch producer and early consumer processes.
./nvscistream_event_sample -m 2 -r 1 -p &
./nvscistream_event_sample -c 0 -k 0 100000 &
# Run the below command after some delay to launch the late-attached consumer process.
sleep 1; # This 1s delay will let producer and consumer to enter into streaming phase.
./nvscistream_event_sample -L -c 1 -k 1 10000 &
Multi-process CUDA/CUDA stream with one early consumer and two late-attached consumers
Producer and early consumer processes are configured to stream 100000 frames, where as
the late-attached consumer process one is configured to receive 10000 frames and
the late-attached consumer process two is configured to receive 50000 frames
# Run the below commands to launch producer and early consumer processes.
./nvscistream_event_sample -m 3 -r 2 -p &
./nvscistream_event_sample -c 0 -k 0 100000 &
# Run the below command after some delay to launch the late-attached consumer process one.
sleep 1; # This 1s delay will let producer and consumer to enter into streaming phase.
./nvscistream_event_sample -L -c 1 -k 1 10000 &
# Run the below command after some delay to launch the late-attached consumer process two.
sleep 1; # This 1s delay will let producer and consumer to enter into streaming phase.
./nvscistream_event_sample -L -c 2 -k 2 50000 &
#Example commands for inter-process re-attach usecase
Multi-process CUDA/CUDA stream with one early consumer and two late-attached consumers
Producer and early consumer processes are configured to stream 100000 frames, where as
the late-attached consumer process one is configured to receive 10000 frames and
the late-attached consumer process two is configured to receive 50000 frames.
Once late-attached consumer process one completes streaming, re-attach it for receiving
5000 frames.
# Run the below commands to launch producer and early consumer processes.
./nvscistream_event_sample -m 3 -r 2 -p &
./nvscistream_event_sample -c 0 -k 0 100000 &
# Run the below command after some delay to launch the late-attached consumer process one.
sleep 1; # This 1s delay will let producer and consumer to enter into streaming phase.
./nvscistream_event_sample -L -c 1 -k 1 10000 &
# Run the below command after some delay to launch the late-attached consumer process two.
sleep 1;
./nvscistream_event_sample -L -c 2 -k 2 50000 &
# After late-attached consumer process one completes, re-attach it.
./nvscistream_event_sample -L -c 1 -k 1 5000 &
Limitations with C2C late/re-attach:
This sample app has the following limitations.
1. For C2C late/re-attach, this sample app does not support IPC consumer being the only early
consumer and all the remaining consumers as C2C late-attached. This is due to setting static
attribute logic for late-attach is not added.
2. A C2C consumer can acts as an IPC consumer during late-/re-attach but an IPC consumer
cannot be made as C2C consumer during Late/re-attach.
#Example commands for inter-chip late attach usecase
Multi-process CUDA/CUDA stream with one early C2C consumer and one C2C late-attached consumer
Producer and early C2C consumer processes are configured to stream 100000 frames, where as
the late-attached C2C consumer process is configured to receive 10000 frames.
The early consumer uses nvscic2c_pcie_s0_c5_1 <-> nvscic2c_pcie_s0_c6_1 for
C2C communication. The late-attached consumer uses nvscic2c_pcie_s0_c5_2 <->
nvscic2c_pcie_s0_c6_2 for C2C communication.
# Run the below commands to launch producer on SOC1
./nvscistream_event_sample -m 2 -r 1 -P 0 nvscic2c_pcie_s0_c5_1 -P 1 nvscic2c_pcie_s0_c5_2 &
# Run the below commands to launch early consumer process on SOC2
./nvscistream_event_sample -C 0 nvscic2c_pcie_s0_c6_1 -k 0 100000 &
# Run the below command after some delay to launch the late-attached consumer process on SOC2
sleep 1; # This 1s delay will let producer and consumer to enter into streaming phase.
./nvscistream_event_sample -L -C 1 nvscic2c_pcie_s0_c6_2 -k 1 10000 &
Multi-process CUDA/CUDA stream with one early C2C consumer and two C2C late-attached consumer
Producer and early C2C consumer processes are configured to stream 100000 frames, where as
the late-attached C2C consumer process is one configured to receive 10000 frames and
the late-attached C2C consumer process is two configured to receive 10000 frames.
The early consumer uses nvscic2c_pcie_s0_c5_1 <-> nvscic2c_pcie_s0_c6_1 for
C2C communication. The late-attached consumer one uses nvscic2c_pcie_s0_c5_2 <->
nvscic2c_pcie_s0_c6_2 for C2C communication and the late-attached consumer two
uses nvscic2c_pcie_s0_c5_3 <->nvscic2c_pcie_s0_c6_3 for C2C communication.
# Run the below commands to launch producer on SOC1
./nvscistream_event_sample -m 3 -r 2 -P 0 nvscic2c_pcie_s0_c5_1 -P 1 nvscic2c_pcie_s0_c5_2 -P 2 nvscic2c_pcie_s0_c5_3 &
# Run the below commands to launch early consumer process on SOC2
./nvscistream_event_sample -C 0 nvscic2c_pcie_s0_c6_1 -k 0 100000 &
# Run the below command after some delay to launch the late-attached consumer process.
sleep 1; # This 1s delay will let producer and consumer to enter into streaming phase.
./nvscistream_event_sample -L -C 1 nvscic2c_pcie_s0_c6_2 -k 1 10000 &
# Run the below command after some delay to launch the late-attached consumer process.
sleep 1;
./nvscistream_event_sample -L -C 2 nvscic2c_pcie_s0_c6_3 -k 2 10000 &
#Example commands for inter-chip/process re-attach usecase
Multi-process CUDA/CUDA stream with one early consumer and two late-attached consumers
Producer and early consumer processes are configured to stream 100000 frames, where as
the late-attached consumer process one is configured to receive 10000 frames and
the late-attached consumer process two is configured to receive 50000 frames.
Once late-attached consumer process one completes streaming, re-attach it for receiving
5000 frames.
Once late-attached consumer process two completes streaming, re-attach it as IPC consumer for receiving
5000 frames.
# Run the below commands to launch producer on SOC1
./nvscistream_event_sample -m 3 -r 2 -P 0 nvscic2c_pcie_s0_c5_1 -P 1 nvscic2c_pcie_s0_c5_2 -P 2 nvscic2c_pcie_s0_c5_3 &
# Run the below commands to launch early consumer process on SOC2
./nvscistream_event_sample -C 0 nvscic2c_pcie_s0_c6_1 -k 0 100000 &
# Run the below command after some delay to launch the late-attached consumer process.
sleep 1; # This 1s delay will let producer and consumer to enter into streaming phase.
./nvscistream_event_sample -L -C 1 nvscic2c_pcie_s0_c6_2 -k 1 10000 &
# Run the below command after some delay to launch the late-attached consumer process.
sleep 1;
./nvscistream_event_sample -L -C 2 nvscic2c_pcie_s0_c6_3 -k 2 50000 &
# Once late-attached consumer process one completes streaming,
# re-attach it for receiving 5000 frames.
./nvscistream_event_sample -L -C 1 nvscic2c_pcie_s0_c6_2 -k 1 5000 &
# Once late-attached consumer process two completes streaming,
# re-attach it as IPC consumer on SOC1 for receiving 5000 frames.
./nvscistream_event_sample -L -c 2 -k 2 5000 &

146
event_sample_app/Makefile Normal file
View File

@@ -0,0 +1,146 @@
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
#
# Location of common libraries
LIB_DIR = /usr/lib/aarch64-linux-gnu
# NOTE: This directory PATH will be moved from "tegra" to "nvidia".
TEGRA_LIB_DIR ?= /usr/lib/aarch64-linux-gnu/tegra
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
# Location of NVSCI header
NVSCI_HEADER_DIR ?= /usr/include/nvsci_headers
NVSCI_LIB_DIR = $(TEGRA_LIB_DIR)
GCC ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(GCC)
# internal flags
NVCCFLAGS :=
CCFLAGS :=
LDFLAGS :=
# Extra user flags
EXTRA_NVCCFLAGS ?=
EXTRA_LDFLAGS ?=
EXTRA_CCFLAGS ?=
override abi := aarch64
LDFLAGS += --dynamic-linker=/lib/ld-linux-aarch64.so.1
# Debug build flags
dbg = 0
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
TARGET := debug
else
TARGET := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
################################################################################
# Common includes and paths
INCLUDES := -I./
LIBRARIES := -L$(LIB_DIR)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
CUDA_SEARCH_PATH += $(CUDA_PATH)/lib/stubs
CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
ifeq ("$(CUDALIB)","")
$(error ERROR - libcuda.so not found, CUDA Driver is not installed or CUDA_PATH is not correctly set.)
else
CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
LIBRARIES += -L$(CUDALIB) -lcuda -lrt
endif
# Includes and paths for NVSCI libraries
NVSCIBUFHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscibuf.h -print 2>/dev/null)
NVSCISYNCHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscisync.h -print 2>/dev/null)
NVSCISTREAMHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscistream.h -print 2>/dev/null)
NVSCIEVENTHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscievent.h -print 2>/dev/null)
NVSCIIPCHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvsciipc.h -print 2>/dev/null)
ifeq ("$(NVSCIBUFHEADER)","")
$(error ERROR - nvscibuf.h not found in $(NVSCI_HEADER_DIR))
endif
ifeq ("$(NVSCISYNCHEADER)","")
$(error ERROR - nvscisync.h not found in $(NVSCI_HEADER_DIR))
endif
ifeq ("$(NVSCISTREAMHEADER)","")
$(error ERROR - nvscistream.h not found in $(NVSCI_HEADER_DIR))
endif
ifeq ("$(NVSCIEVENTHEADER)","")
$(error ERROR - nvscievent.h not found in $(NVSCI_HEADER_DIR))
endif
ifeq ("$(NVSCIIPCHEADER)","")
$(error ERROR - nvsciipc.h not found in $(NVSCI_HEADER_DIR))
endif
INCLUDES += -I$(NVSCI_HEADER_DIR)
LIBRARIES += -L$(NVSCI_LIB_DIR) -lnvscibuf -lnvscisync -lnvscievent -lnvsciipc -lnvscistream
ALL_CCFLAGS += --std=c++11 --threads 0
################################################################################
# Target rules
OUTPUT := nvscistream_event_sample
all: build
build: $(OUTPUT)
OBJ := main.o
OBJ += block_common.o
OBJ += block_c2c.o
OBJ += block_consumer_uc1.o
OBJ += block_ipc.o
OBJ += block_limiter.o
OBJ += block_multicast.o
OBJ += block_pool.o
OBJ += block_presentsync.o
OBJ += block_producer_uc1.o
OBJ += block_queue.o
OBJ += block_returnsync.o
OBJ += event_loop_service.o
OBJ += event_loop_threads.o
OBJ += util.o
%.o: %.c
$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) -o $@ -c $<
$(OUTPUT): $(OBJ)
$(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES)
run: build
$(OUTPUT)
testrun: build
clean:
rm -f $(OBJ) $(OUTPUT)
clobber: clean

View File

@@ -0,0 +1,170 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - C2C block
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvsciipc.h"
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/* Create and register a new C2C src block */
int32_t createC2cSrc(
NvSciStreamBlock* c2cSrc,
const char* channel,
NvSciStreamBlock queue)
{
NvSciError err;
uint32_t i;
uint32_t slot = 0;
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("C2cSrc", 0);
if (NULL == blockData) {
return 0;
}
for (i=0; i< MAX_CONSUMERS; i++) {
if (!strcmp(ipcEP[i].c2cChannel, channel)) {
slot = i;
break;
}
}
/* Open the named channel */
err = NvSciIpcOpenEndpoint(channel, &ipcEP[slot].c2cEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to open channel (%s) for C2C src\n",
err, channel);
deleteCommon(blockData);
return 0;
}
err = NvSciIpcResetEndpointSafe(ipcEP[slot].c2cEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to reset IPC endpoint", err);
}
/* Create a C2C src block */
err = NvSciStreamIpcSrcCreate2(ipcEP[slot].c2cEndpoint,
sciSyncModule,
sciBufModule,
queue,
&blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create C2C src block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*c2cSrc = blockData->block;
ipcEP[slot].ipcBlock = blockData->block;
ipcEP[slot].c2cOpened = true;
ipcEP[slot].c2cConnected = true;
return 1;
}
/* Create and register a new C2C src block */
int32_t createC2cSrc2(
NvSciStreamBlock* c2cSrc,
NvSciIpcEndpoint endpoint,
NvSciStreamBlock queue)
{
NvSciError err;
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("C2cSrc", 0);
if (NULL == blockData) {
return 0;
}
/* Create a C2C src block */
err = NvSciStreamIpcSrcCreate2(endpoint,
sciSyncModule,
sciBufModule,
queue,
&blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create C2C src block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*c2cSrc = blockData->block;
return 1;
}
/* Create and register a new C2C dst block */
int32_t createC2cDst(
NvSciStreamBlock* c2cDst,
const char* channel,
NvSciStreamBlock pool)
{
NvSciError err;
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("C2cDst", 0);
if (NULL == blockData) {
return 0;
}
/* Open the named channel */
err = NvSciIpcOpenEndpoint(channel, &ipcEP[0].ipcEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to open channel (%s) for C2C dst\n",
err, channel);
deleteCommon(blockData);
return 0;
}
err = NvSciIpcResetEndpointSafe(ipcEP[0].ipcEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to reset IPC endpoint", err);
}
/* Create a C2C dst block */
err = NvSciStreamIpcDstCreate2(ipcEP[0].ipcEndpoint,
sciSyncModule,
sciBufModule,
pool,
&blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create C2C dst block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*c2cDst = blockData->block;
return 1;
}

View File

@@ -0,0 +1,279 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - common block event handling
*
* Block types which do not require type-specific interactions make use of
* this common code.
*/
#include <stdlib.h>
#if (QNX == 1)
#include <sys/neutrino.h>
#endif
#include <stdio.h>
#include <unistd.h>
#include <pthread.h>
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/* Variable indicates whether the thread handling the
* late consumer connetions is started or not
*/
static bool threadStarted = false;
/* Delete common block */
void deleteCommon(
void* data)
{
BlockData* blockData = (BlockData*)data;
/* Destroy block */
if (blockData->block != 0) {
(void)NvSciStreamBlockDelete(blockData->block);
}
/* Check if it is late/re-attach usecase */
if (opts.numLateConsumer > 0U) {
/* Close the endpoints used by the IpcSrc/C2CSrc
* blocks for next late-/re-attach consumer connection
*/
pthread_mutex_lock(&mutex);
if ((!strcmp(blockData->name, "IpcSrc")) ||
(!strcmp(blockData->name, "C2cSrc"))) {
for (uint32_t i=0; i< MAX_CONSUMERS; i++) {
if (ipcEP[i].ipcBlock == blockData->block) {
/* close the Ipc endpoint */
if (ipcEP[i].ipcEndpoint) {
#if (QNX == 1)
if (ipcEP[i].coid != 0) {
(void)ConnectDetach_r(ipcEP[i].coid);
ipcEP[i].coid = 0;
}
if (ipcEP[i].chid != 0) {
(void)ChannelDestroy_r(ipcEP[i].chid);
ipcEP[i].chid = 0;
}
#endif
if (NvSciError_Success !=
NvSciIpcCloseEndpointSafe(ipcEP[i].ipcEndpoint, false)) {
printf("Failed to close ipc endpoint\n");
}
sleep(2);
ipcEP[i].ipcEndpoint = 0U;
}
/* close the C2C endpoint */
if (ipcEP[i].c2cEndpoint) {
if (NvSciError_Success !=
NvSciIpcCloseEndpointSafe(ipcEP[i].c2cEndpoint, false)) {
printf("Failed to close ipc endpoint\n");
}
ipcEP[i].c2cEndpoint = 0U;
}
/* clear the informaton as this is needed
* for next late-/re-attach connection
*/
ipcEP[i].ipcBlock = 0U;
ipcEP[i].ipcConnected = false;
ipcEP[i].c2cConnected = false;
ipcEP[i].ipcOpened = false;
ipcEP[i].c2cOpened = false;
break;
}
}
/* Wakeup the thread to handle the next set of
* late-/re-attach consumer connections
*/
pthread_cond_signal(&cond);
}
pthread_mutex_unlock(&mutex);
}
/* Free data */
free(blockData);
}
/* Handle events on a common block
*
* Blocks that don't require interaction after connection usually just
* receive notification of connection and disconnection.
*/
int32_t handleCommon(
void* data,
uint32_t wait)
{
/* Cast to common data */
BlockData* blockData = (BlockData*)data;
/* Get time to wait */
int64_t waitTime = wait ? blockData->waitTime : 0;
/* Query/wait for an event on the block */
NvSciStreamEventType event;
NvSciError err;
err = NvSciStreamBlockEventQuery(blockData->block, waitTime, &event);
/* Handle errors */
if (NvSciError_Success != err) {
/* If not asked to wait, a timeout is not an error */
if (!waitTime && (NvSciError_Timeout == err)) {
return 0;
}
/* Otherwise, any error is considered fatal. A timeout probably
* indicates a failure to connect and complete setup in a timely
* fashion, so we specifically call out this case.
*/
if (NvSciError_Timeout == err) {
printf("%s timed out waiting for setup instructions\n",
blockData->name);
} else {
printf("%s event query failed with error %x\n",
blockData->name, err);
}
blockData->deleteFunc(blockData);
return -1;
}
/* If we received an event, handle it based on its type */
int32_t rv = 1;
NvSciError status;
switch (event) {
/*
* Any event we don't explicitly handle is a fatal error
*/
default:
printf("%s received unknown event %x\n",
blockData->name, event);
rv = -1;
break;
/*
* Error events should never occur with safety-certified drivers,
* and are provided only in non-safety builds for debugging
* purposes. Even then, they should only occur when something
* fundamental goes wrong, like the system running out of memory,
* or stack/heap corruption, or a bug in NvSci which should be
* reported to NVIDIA.
*/
case NvSciStreamEventType_Error:
err = NvSciStreamBlockErrorGet(blockData->block, &status);
if (NvSciError_Success != err) {
printf("%s Failed to query the error event code %x\n",
blockData->name, err);
rv = -1;
} else {
if ((opts.numLateConsumer > 0U) &&
(status == NvSciError_StreamNotConnected)) {
printf("[WARN] %s received error event: %x\n",
blockData->name, status);
rv = 2;
} else {
printf("%s received error event: %x\n",
blockData->name, status);
rv = -1;
}
}
break;
/*
* If told to disconnect, it means either the stream finished its
* business or some other block had a failure. We'll just do a
* clean up and return without an error.
*/
case NvSciStreamEventType_Disconnected:
rv = 2;
break;
/*
* The block doesn't have to do anything on connection, but now we may
* wait forever for any further events, so the timeout becomes infinite.
*/
case NvSciStreamEventType_Connected:
/* Query producer and consumer(s) endpoint info if needed */
blockData->waitTime = -1;
break;
/* All setup complete. Transition to runtime phase */
case NvSciStreamEventType_SetupComplete:
if (opts.numLateConsumer > 0U) {
/* Check if it is multicast block */
if (!strcmp(blockData->name, "Multicast")) {
/* Wakeup the thread to handle the next set
* of late-/re-attach consumer connections
*/
pthread_cond_signal(&cond);
if (!threadStarted) {
threadStarted = true;
/* Spawn a thread to handle the late attach connections */
int32_t status = pthread_create(&dispatchThread,
NULL,
handleLateConsumerThreadFunc,
NULL);
if (status != 0) {
printf("Failed to spawn thread to monitor late consumer connections\n");
/* Abort the process as this thread is important
* to process the late-/re-attach consumer connections.
* Failed to create this thread makes the late/re-attach usecase
* unusable.
*/
abort();
}
}
}
}
break;
}
/* On failure or final event, clean up the block */
if ((rv < 0) || (1 < rv)) {
blockData->deleteFunc(blockData);
}
return rv;
}
/* Create and register a new common block */
BlockData* createCommon(
char const* name,
size_t size)
{
/* If no size specified, just use BlockData */
if (0 == size) {
size = sizeof(BlockData);
}
/* Create a data structure to track the block's status */
BlockData* commonData = (BlockData*)calloc(1, size);
if (NULL == commonData) {
printf("Failed to allocate data structure for %s\n", name);
return NULL;
}
/* Save the name for debugging purposes */
strcpy(commonData->name, name);
/* Wait time for initial connection event will be 60 seconds */
commonData->waitTime = 60 * 1000000;
/* Use the common delete function */
commonData->deleteFunc = deleteCommon;
return commonData;
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,284 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - block abstraction
*/
#ifndef _BLOCK_INFO_H
#define _BLOCK_INFO_H 1
#include <stdatomic.h>
#include <pthread.h>
#include "nvscistream.h"
/* Maximum number of consumers */
#define MAX_CONSUMERS 4
/* Maximum number of blocks */
#define MAX_BLOCKS 100
/* Maximum number of packets supported */
#define MAX_PACKETS 32
/* Maximum number of elements supported */
#define MAX_ELEMS 8
/* Memory size of endpoint inforamtion */
#define INFO_SIZE 50
/* NvSci modules for all objects */
extern NvSciSyncModule sciSyncModule;
extern NvSciBufModule sciBufModule;
/* Flag used to terminate the thread that
* was spawned to handle the late/re-attached
* consumer connections upon stream disconnect
*/
extern atomic_int streamDone;
/* Number of registered blocks for streaming */
extern int32_t numBlocks;
/* Number of active blocks */
extern uint32_t numAlive;
/* variables for synchronization */
extern pthread_mutex_t mutex;
extern pthread_cond_t cond;
/* Thread for handling late/re-attached consumer
* connections
*/
extern pthread_t dispatchThread;
/* Common options for all blocks */
typedef struct {
/* Indicate whether the producer/consumer sets endpoint info */
bool endInfo;
/* Indicate whether the producer uses yuv format */
bool yuv;
/* Indicate whether the extern event service is used */
bool useExtEventService;
/* Indicates the number of late consumers for late/re-attach usecase */
uint32_t numLateConsumer;
/* Total number of consumers */
uint32_t numConsumer;
/* Indicates c2c usecase */
bool c2cMode;
/* Indicates consumer connection is late/reattach*/
bool lateAttach;
} CommonOptions;
extern CommonOptions opts;
/* Endpoint data structure for tracking the
* IPC/C2C channels
*/
typedef struct {
/* Holds the IPC endpoint corresponding to an IPC channel */
NvSciIpcEndpoint ipcEndpoint;
/* Holds the C2C endpoint corresponding to an C2C channel */
NvSciIpcEndpoint c2cEndpoint;
/* named IPC channel */
char ipcChannel[32];
/* named IPC channel used for handsking between
* producer and late/re-attached consumer connection
*/
char ipcChannelForHandshake[32];
/* named c2c channel */
char c2cChannel[32];
/* IPC/C2C block created for handling the late/re-attached
* consumer connections*/
NvSciStreamBlock ipcBlock;
/* Queue block that is needed for c2c usecase for a c2c
* consumer late/reattach connections
*/
NvSciStreamBlock queue;
/* ReturnSync block that is needed for c2c usecase for a c2c
* consumer late/reattach connections
*/
NvSciStreamBlock returnSync;
/* Indicates the connect state of IPC channel */
bool ipcConnected;
/* Indicates the connect state of C2C channel */
bool c2cConnected;
/* Indicates the Open state of IPC channel */
bool ipcOpened;
/* Indicates the Open state of C2C channel */
bool c2cOpened;
/* QNX channel ID for communication */
int32_t chid;
/* QNX channel connection ID */
int32_t coid;
} Endpoint;
extern Endpoint ipcEP[MAX_CONSUMERS];
typedef struct {
/* The application's name for the element */
uint32_t userName;
/* Attribute list for element */
NvSciBufAttrList attrList;
} ElemAttr;
/*
* Some block types that do not require direct interaction will share a
* common private data structure and event handling functon.
*/
/* Common block private data */
typedef struct {
NvSciStreamBlock block;
int64_t waitTime;
char name[32];
void (*deleteFunc)(void*);
} BlockData;
typedef int32_t (*BlockFunc)(void* data, uint32_t wait);
/* Structure to track block info */
typedef struct {
NvSciStreamBlock handle;
void* data;
BlockFunc func;
NvSciEventNotifier* notifier;
bool isAlive;
bool retry;
} BlockEventData;
extern BlockEventData blocks[MAX_BLOCKS];
extern BlockEventData* blocksAlive[MAX_BLOCKS];
/* Create data structure for common blocks */
extern BlockData* createCommon(
char const* name,
size_t size);
/* Handle event for common block */
extern int32_t handleCommon(
void* data,
uint32_t wait);
/* Delete common block */
extern void deleteCommon(
void* data);
/*
* Functions for setting up each kind of block
*/
extern int32_t createIpcDst(
NvSciStreamBlock* ipcDst,
const char* channel,
bool useExternalEventService);
extern int32_t createIpcSrc(
NvSciStreamBlock* ipcSrc,
const char* channel,
bool useExternalEventService);
extern int32_t createIpcSrc2(
NvSciStreamBlock* ipcsrc,
NvSciIpcEndpoint endpoint,
bool useExtEventService);
extern int32_t createC2cSrc2(
NvSciStreamBlock* c2cSrc,
NvSciIpcEndpoint endpoint,
NvSciStreamBlock queue);
extern int32_t createC2cDst(
NvSciStreamBlock* c2cDst,
const char* channel,
NvSciStreamBlock pool);
extern int32_t createC2cSrc(
NvSciStreamBlock* c2cSrc,
const char* channel,
NvSciStreamBlock queue);
extern int32_t createLimiter(
NvSciStreamBlock* limiter,
uint32_t limit);
extern int32_t createPresentSync(
NvSciStreamBlock* presentSync);
extern int32_t createReturnSync(
NvSciStreamBlock* returnSync);
extern int32_t createMulticast(
NvSciStreamBlock* multicast,
uint32_t numConsumer);
extern int32_t createPool_Common(
NvSciStreamBlock* pool,
uint32_t numPacket,
bool isC2cPool);
extern int32_t createQueue(
NvSciStreamBlock* queue,
uint32_t useMailbox);
extern int32_t (*createProducer)(
NvSciStreamBlock* producer,
NvSciStreamBlock pool,
uint32_t numFrames);
extern int32_t (*createConsumer)(
NvSciStreamBlock* consumer,
NvSciStreamBlock queue,
uint32_t index,
uint32_t frames);
extern int32_t (createProducer_Usecase1)(
NvSciStreamBlock* producer,
NvSciStreamBlock pool,
uint32_t numFrames);
extern int32_t (createConsumer_Usecase1)(
NvSciStreamBlock* consumer,
NvSciStreamBlock queue,
uint32_t index,
uint32_t frames);
extern int32_t (createProducer_Usecase2)(
NvSciStreamBlock* producer,
NvSciStreamBlock pool,
uint32_t numFrames);
extern int32_t (createConsumer_Usecase2)(
NvSciStreamBlock* consumer,
NvSciStreamBlock queue,
uint32_t index,
uint32_t frames);
extern int32_t(createProducer_Usecase3)(
NvSciStreamBlock* producer,
NvSciStreamBlock pool,
uint32_t numFrames);
extern int32_t(createConsumer_Usecase3)(
NvSciStreamBlock* consumer,
NvSciStreamBlock queue,
uint32_t index,
uint32_t frames);
extern int32_t createPool_Usecase3(
NvSciStreamBlock* pool,
uint32_t numPacket,
bool isC2cPool);
extern void* handleLateConsumerThreadFunc(void*);
#endif // _BLOCK_INFO_H

View File

@@ -0,0 +1,199 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - ipc blocks
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvsciipc.h"
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/*
* Ipc blocks do not require any block-specific interactions so we
* use the set of common functions to handle its events. However
* they do have an additional data field which needs to be cleaned
* up when the block is destroyed, so we use more than the common
* data structure and delete function.
*/
/* Create and register a new ipcsrc block */
int32_t createIpcSrc(
NvSciStreamBlock* ipcsrc,
const char* channel,
bool useExtEventService)
{
NvSciError err;
uint32_t i;
uint32_t slot = 0;
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("IpcSrc", 0);
if (NULL == blockData) {
return 0;
}
for (i=0; i< MAX_CONSUMERS; i++) {
if (!strcmp(ipcEP[i].ipcChannel, channel)) {
slot = i;
break;
}
}
/* Open the named channel */
err = NvSciIpcOpenEndpoint(channel, &ipcEP[slot].ipcEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to open channel (%s) for IpcSrc\n",
err, channel);
deleteCommon(blockData);
return 0;
}
err = NvSciIpcResetEndpointSafe(ipcEP[slot].ipcEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to reset IPC endpoint", err);
}
/* Create a ipcsrc block */
err = NvSciStreamIpcSrcCreate(ipcEP[slot].ipcEndpoint,
sciSyncModule,
sciBufModule,
&blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create IpcSrc block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
/* Configuate block to use the external event service for
* internal I/O messages */
if (useExtEventService && !eventFuncs->regInt(blockData->block)) {
deleteCommon(blockData);
return 0;
}
*ipcsrc = blockData->block;
ipcEP[slot].ipcBlock = blockData->block;
ipcEP[slot].ipcOpened = true;
ipcEP[slot].ipcConnected = true;
return 1;
}
/* Create and register a new ipcsrc block */
int32_t createIpcSrc2(
NvSciStreamBlock* ipcsrc,
NvSciIpcEndpoint endpoint,
bool useExtEventService)
{
NvSciError err;
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("IpcSrc", 0);
if (NULL == blockData) {
return 0;
}
/* Create a ipcsrc block */
err = NvSciStreamIpcSrcCreate(endpoint,
sciSyncModule,
sciBufModule,
&blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create IpcSrc block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
/* Configuate block to use the external event service for
* internal I/O messages */
if (useExtEventService && !eventFuncs->regInt(blockData->block)) {
deleteCommon(blockData);
return 0;
}
*ipcsrc = blockData->block;
return 1;
}
/* Create and register a new ipcdst block */
int32_t createIpcDst(
NvSciStreamBlock* ipcdst,
const char* channel,
bool useExtEventService)
{
NvSciError err;
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("IpcDst", 0);
if (NULL == blockData) {
return 0;
}
/* Open the named channel */
err = NvSciIpcOpenEndpoint(channel, &ipcEP[0].ipcEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to open channel (%s) for IpcDst\n",
err, channel);
deleteCommon(blockData);
return 0;
}
err = NvSciIpcResetEndpointSafe(ipcEP[0].ipcEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to reset IPC endpoint", err);
}
/* Create a ipcdst block */
err = NvSciStreamIpcDstCreate(ipcEP[0].ipcEndpoint,
sciSyncModule,
sciBufModule,
&blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create IpcDst block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
/* Configuate block to use the external event service for
* internal I/O messages */
if (useExtEventService && !eventFuncs->regInt(blockData->block)) {
deleteCommon(blockData);
return 0;
}
*ipcdst = blockData->block;
return 1;
}

View File

@@ -0,0 +1,56 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - limiter block
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/*
* Limiter blocks do not require any block-specific interactions so we
* use the set of common functions to handle its events.
*/
/* Create and register a new limiter block */
int32_t createLimiter(
NvSciStreamBlock* limiter,
uint32_t limit)
{
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("Limiter", 0);
if (NULL == blockData) {
return 0;
}
/* Create a limiter block */
NvSciError err =
NvSciStreamLimiterCreate(limit, &blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create limiter block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*limiter = blockData->block;
return 1;
}

View File

@@ -0,0 +1,57 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - multicast block
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/*
* Multicast blocks do not require any block-specific interactions so we
* use the set of common functions to handle its events.
*/
/* Create and register a new multicast block */
int32_t createMulticast(
NvSciStreamBlock* multicast,
uint32_t numConsumer)
{
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("Multicast", 0);
if (NULL == blockData) {
return 0;
}
/* Create a multicast block */
NvSciError err =
NvSciStreamMulticastCreate(numConsumer, &blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create limiter block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*multicast = blockData->block;
return 1;
}

View File

@@ -0,0 +1,743 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - pool block
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/* Internal data used by the pool block */
typedef struct {
BlockData common;
bool isC2cPool;
uint32_t numConsumers;
uint32_t numProdElem;
uint32_t numConsElem;
bool elementsDone;
ElemAttr prodElem[MAX_ELEMS];
ElemAttr consElem[MAX_ELEMS];
uint32_t numPacket;
uint32_t numPacketReady;
bool packetsDone;
NvSciStreamPacket packet[MAX_PACKETS];
} PoolData;
/* Free up pool block resources */
static void deletePool(
PoolData* poolData)
{
/* Destroy block */
if (poolData->common.block != 0) {
(void)NvSciStreamBlockDelete(poolData->common.block);
}
/* Free data */
free(poolData);
}
/* Handle query of basic stream info */
static int32_t handleStreamInit(
PoolData* poolData)
{
/* Query number of consumers */
NvSciError err =
NvSciStreamBlockConsumerCountGet(poolData->common.block,
&poolData->numConsumers);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to query number of consumers", err);
return 0;
}
/* Query producer and consumer(s) endpoint info if needed */
return 1;
}
/*
* If ready, handle setup of the pool buffers.
*
* Most of the work the pool application has to do resides in this function.
*/
static int32_t handlePoolBufferSetup(
PoolData* poolData)
{
NvSciError err;
/* Query producer element count */
err = NvSciStreamBlockElementCountGet(poolData->common.block,
NvSciStreamBlockType_Producer,
&poolData->numProdElem);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to query producer element count\n", err);
return 0;
}
/* Query consumer element count */
err = NvSciStreamBlockElementCountGet(poolData->common.block,
NvSciStreamBlockType_Consumer,
&poolData->numConsElem);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to query consumer element count\n", err);
return 0;
}
/* Query all producer elements */
for (uint32_t i=0U; i<poolData->numProdElem; ++i) {
err = NvSciStreamBlockElementAttrGet(poolData->common.block,
NvSciStreamBlockType_Producer, i,
&poolData->prodElem[i].userName,
&poolData->prodElem[i].attrList);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to query producer element %d\n", err, i);
return 0;
}
}
/* Query all consumer elements */
for (uint32_t i=0U; i<poolData->numConsElem; ++i) {
err = NvSciStreamBlockElementAttrGet(poolData->common.block,
NvSciStreamBlockType_Consumer, i,
&poolData->consElem[i].userName,
&poolData->consElem[i].attrList);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to query consumer element %d\n", err, i);
return 0;
}
}
/* Indicate that all element information has been imported */
poolData->elementsDone = true;
err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
NvSciStreamSetup_ElementImport,
true);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to complete element import\n", err);
return 0;
}
/*
* Go through requested elements from producer and consumer and line
* them up. A general streaming application might not have a one to
* one correspondence, and the pool may have to decide what subset
* of elements to select based on knowledge of the data types that
* the application suite supports. This sample application is much
* simpler, but we still go through the process rather than assuming
* producer and consumer have requested the same things in the same
* order.
*/
uint32_t numElem = 0, p, c, e, i;
ElemAttr elem[MAX_ELEMS];
for (p=0; p<poolData->numProdElem; ++p) {
ElemAttr* prodElem = &poolData->prodElem[p];
for (c=0; c<poolData->numConsElem; ++c) {
ElemAttr* consElem = &poolData->consElem[c];
/* If requested element types match, combine the entries */
if (prodElem->userName == consElem->userName) {
ElemAttr* poolElem = &elem[numElem++];
poolElem->userName = prodElem->userName;
poolElem->attrList = NULL;
/* Combine and reconcile the attribute lists */
NvSciBufAttrList oldAttrList[2] = { prodElem->attrList,
consElem->attrList };
NvSciBufAttrList conflicts = NULL;
err = NvSciBufAttrListReconcile(oldAttrList, 2,
&poolElem->attrList,
&conflicts);
/* Discard any conflict list.
* (Could report its contents for additional debug info)
*/
if (NULL != conflicts) {
NvSciBufAttrListFree(conflicts);
}
/* Abort on error */
if (NvSciError_Success != err) {
printf("Failed to reconcile element %x attrs (%x)\n",
poolElem->userName, err);
return 0;
}
/* Found a match for this producer element so move on */
break;
} /* if match */
} /* for all requested consumer elements */
} /* for all requested producer elements */
/* Should be at least one element */
if (0 == numElem) {
printf("Pool didn't find any common elements\n");
return 0;
}
/* The requested attribute lists are no longer needed, so discard them */
for (p=0; p<poolData->numProdElem; ++p) {
ElemAttr* prodElem = &poolData->prodElem[p];
if (NULL != prodElem->attrList) {
NvSciBufAttrListFree(prodElem->attrList);
prodElem->attrList = NULL;
}
}
for (c=0; c<poolData->numConsElem; ++c) {
ElemAttr* consElem = &poolData->consElem[c];
if (NULL != consElem->attrList) {
NvSciBufAttrListFree(consElem->attrList);
consElem->attrList = NULL;
}
}
/* Inform the stream of the chosen elements */
for (e=0; e<numElem; ++e) {
ElemAttr* poolElem = &elem[e];
err = NvSciStreamBlockElementAttrSet(poolData->common.block,
poolElem->userName,
poolElem->attrList);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to send element %d info\n", err, e);
return 0;
}
}
/* Indicate that all element information has been exported */
err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
NvSciStreamSetup_ElementExport,
true);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to complete element export\n", err);
return 0;
}
/*
* Create and send all the packets and their buffers
* Note: Packets and buffers are not guaranteed to be received by
* producer and consumer in the same order sent, nor are the
* status messages sent back guaranteed to preserve ordering.
* This is one reason why an event driven model is more robust.
*/
for (i=0; i<poolData->numPacket; ++i) {
/*
* Create a new packet
* Our pool implementation doesn't need to save any packet-specific
* data, but we do need to provide unique cookies, so we just
* use the pointer to the location we save the handle. For other
* blocks, this will be a pointer to the structure where the
* packet information is kept.
*/
NvSciStreamCookie cookie = (NvSciStreamCookie)&poolData->packet[i];
err = NvSciStreamPoolPacketCreate(poolData->common.block,
cookie,
&poolData->packet[i]);
if (NvSciError_Success != err) {
printf("Failed (%x) to create packet %d\n", err, i);
return 0;
}
/* Create buffers for the packet */
for (e=0; e<numElem; ++e) {
/* Allocate a buffer object */
NvSciBufObj obj;
err = NvSciBufObjAlloc(elem[e].attrList, &obj);
if (NvSciError_Success != err) {
printf("Failed (%x) to allocate buffer %d of packet %d\n",
err, e, i);
return 0;
}
/* Insert the buffer in the packet */
err = NvSciStreamPoolPacketInsertBuffer(poolData->common.block,
poolData->packet[i],
e, obj);
if (NvSciError_Success != err) {
printf("Failed (%x) to insert buffer %d of packet %d\n",
err, e, i);
return 0;
}
/* The pool doesn't need to keep a copy of the object handle */
NvSciBufObjFree(obj);
}
/* Indicate packet setup is complete */
err = NvSciStreamPoolPacketComplete(poolData->common.block,
poolData->packet[i]);
if (NvSciError_Success != err) {
printf("Failed (%x) to complete packet %d setup\n",
err, i);
return 0;
}
}
/*
* Indicate that all packets have been sent.
* Note: An application could choose to wait to send this until
* the status has been received, in order to try to make any
* corrections for rejected packets.
*/
err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
NvSciStreamSetup_PacketExport,
true);
if (NvSciError_Success != err) {
printf("Failed (%x) to complete packet export\n",
err);
return 0;
}
/* Once all packets are set up, no longer need to keep the attributes */
for (e=0; e<numElem; ++e) {
ElemAttr* poolElem = &elem[e];
if (NULL != poolElem->attrList) {
NvSciBufAttrListFree(poolElem->attrList);
poolElem->attrList = NULL;
}
}
return 1;
}
/*
* If ready, handle setup of the C2C pool buffers.
*
* Most of the work the pool application has to do resides in this function.
*/
static int32_t handleC2cPoolBufferSetup(
PoolData* poolData)
{
NvSciError err;
/* Query allocated element count from the primary pool */
uint32_t numElem;
err = NvSciStreamBlockElementCountGet(poolData->common.block,
NvSciStreamBlockType_Producer,
&numElem);
if (NvSciError_Success != err) {
printf("C2C pool failed (%x) to query allocated element count\n",
err);
return 0;
}
/* Query all allocated elements from the primary pool */
ElemAttr elem[MAX_ELEMS];
for (uint32_t i = 0U; i<numElem; ++i) {
err = NvSciStreamBlockElementAttrGet(poolData->common.block,
NvSciStreamBlockType_Producer, i,
&elem[i].userName,
&elem[i].attrList);
if (NvSciError_Success != err) {
printf("C2C pool failed (%x) to query allocated element %d\n",
err, i);
return 0;
}
}
/* If necessary, query the consumer elements for validation */
/* Indicate that all element information has been imported */
poolData->elementsDone = true;
err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
NvSciStreamSetup_ElementImport,
true);
if (NvSciError_Success != err) {
printf("C2C pool failed (%x) to complete element import\n", err);
return 0;
}
/*
* Create and send all the packets and their buffers
*/
for (uint32_t i = 0; i<poolData->numPacket; ++i) {
/*
* Create a new packet
* Our pool implementation doesn't need to save any packet-specific
* data, but we do need to provide unique cookies, so we just
* use the pointer to the location we save the handle. For other
* blocks, this will be a pointer to the structure where the
* packet information is kept.
*/
NvSciStreamCookie cookie = (NvSciStreamCookie)&poolData->packet[i];
err = NvSciStreamPoolPacketCreate(poolData->common.block,
cookie,
&poolData->packet[i]);
if (NvSciError_Success != err) {
printf("Failed (%x) to create packet %d\n", err, i);
return 0;
}
/* Create buffers for the packet */
for (uint32_t e = 0; e<numElem; ++e) {
/* Allocate a buffer object */
NvSciBufObj obj;
err = NvSciBufObjAlloc(elem[e].attrList, &obj);
if (NvSciError_Success != err) {
printf("Failed (%x) to allocate buffer %d of packet %d\n",
err, e, i);
return 0;
}
/* Insert the buffer in the packet */
err = NvSciStreamPoolPacketInsertBuffer(poolData->common.block,
poolData->packet[i],
e, obj);
if (NvSciError_Success != err) {
printf("Failed (%x) to insert buffer %d of packet %d\n",
err, e, i);
return 0;
}
/* The pool doesn't need to keep a copy of the object handle */
NvSciBufObjFree(obj);
}
/* Indicate packet setup is complete */
err = NvSciStreamPoolPacketComplete(poolData->common.block,
poolData->packet[i]);
if (NvSciError_Success != err) {
printf("Failed (%x) to complete packet %d setup\n",
err, i);
return 0;
}
}
/*
* Indicate that all packets have been sent.
* Note: An application could choose to wait to send this until
* the status has been received, in order to try to make any
* corrections for rejected packets.
*/
err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
NvSciStreamSetup_PacketExport,
true);
if (NvSciError_Success != err) {
printf("Failed (%x) to complete packet export\n",
err);
return 0;
}
/* Once all packets are set up, no longer need to keep the attributes */
for (uint32_t e = 0; e<numElem; ++e) {
ElemAttr* poolElem = &elem[e];
if (NULL != poolElem->attrList) {
NvSciBufAttrListFree(poolElem->attrList);
poolElem->attrList = NULL;
}
}
return 1;
}
/* Check packet status */
static int32_t handlePacketsStatus(
PoolData* poolData)
{
bool packetFailure = false;
NvSciError err;
/* Check each packet */
for (uint32_t p = 0; p < poolData->numPacket; ++p) {
/* Check packet acceptance */
bool accept;
err = NvSciStreamPoolPacketStatusAcceptGet(poolData->common.block,
poolData->packet[p],
&accept);
if (NvSciError_Success != err) {
printf("Failed (%x) to retrieve packet %d's acceptance-statue\n",
err, p);
return 0;
}
if (accept) {
continue;
}
/* On rejection, query and report details */
packetFailure = true;
NvSciError status;
/* Check packet status from producer */
err = NvSciStreamPoolPacketStatusValueGet(
poolData->common.block,
poolData->packet[p],
NvSciStreamBlockType_Producer, 0U,
&status);
if (NvSciError_Success != err) {
printf("Failed (%x) to retrieve packet %d's statue from producer\n",
err, p);
return 0;
}
if (status != NvSciError_Success) {
printf("Producer rejected packet %d with error %x\n", p, status);
}
/* Check packet status from consumers */
for (uint32_t c = 0; c < poolData->numConsumers; ++c) {
err = NvSciStreamPoolPacketStatusValueGet(
poolData->common.block,
poolData->packet[p],
NvSciStreamBlockType_Consumer, c,
&status);
if (NvSciError_Success != err) {
printf("Failed (%x) to retrieve packet %d's statue from consumer %d\n",
err, p, c);
return 0;
}
if (status != NvSciError_Success) {
printf("Consumer %d rejected packet %d with error %x\n",
c, p, status);
}
}
}
/* Indicate that status for all packets has been received. */
poolData->packetsDone = true;
err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
NvSciStreamSetup_PacketImport,
true);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to complete packets import\n", err);
return 0;
}
return packetFailure ? 0 : 1;
}
/* Handle events on a pool block
*
* The pool block coordinates allocation of packets based on producer
* and consumer requirements during setup. After that, no further
* events should be received until the stream is torn down.
*/
static int32_t handlePool(
void* data,
uint32_t wait)
{
/* Cast to pool data */
PoolData* poolData = (PoolData*)data;
/* Get time to wait */
int64_t waitTime = wait ? poolData->common.waitTime : 0;
/* Query/wait for an event on the block */
NvSciStreamEventType event;
NvSciError err;
err = NvSciStreamBlockEventQuery(poolData->common.block, waitTime, &event);
/* Handle errors */
if (NvSciError_Success != err) {
/* If not asked to wait, a timeout is not an error */
if (!waitTime && (NvSciError_Timeout == err)) {
return 0;
}
/* Otherwise, any error is considered fatal. A timeout probably
* indicates a failure to connect and complete setup in a timely
* fashion, so we specifically call out this case.
*/
if (NvSciError_Timeout == err) {
printf("Pool timed out waiting for setup instructions\n");
} else {
printf("Pool event query failed with error %x\n", err);
}
deletePool(poolData);
return -1;
}
/* If we received an event, handle it based on its type
*
* Note that there's a lot of error checking we could choose to do for
* some of these events, like making sure that we only receive each
* event once for a given entry. But NvSciStream is expected to take
* care of all of that, even when the application makes a mistake.
* So we only check for things that don't trigger NvSciStream errors.
*/
int32_t rv = 1;
NvSciError status;
switch (event) {
/*
* Any event we don't explicitly handle is a fatal error
*/
default:
printf("Pool received unknown event %x\n", event);
rv = -1;
break;
/*
* Error events should never occur with safety-certified drivers,
* and are provided only in non-safety builds for debugging
* purposes. Even then, they should only occur when something
* fundamental goes wrong, like the system running out of memory,
* or stack/heap corruption, or a bug in NvSci which should be
* reported to NVIDIA.
*/
case NvSciStreamEventType_Error:
err = NvSciStreamBlockErrorGet(poolData->common.block, &status);
if (NvSciError_Success != err) {
printf("%s Failed to query the error event code %x\n",
poolData->common.name, err);
rv = -1;
} else {
if ((opts.numLateConsumer > 0U) &&
(status == NvSciError_StreamNotConnected)) {
printf("[WARN] %s received error event: %x\n",
poolData->common.name, status);
rv = 2;
} else {
printf("%s received error event: %x\n",
poolData->common.name, status);
rv = -1;
}
}
break;
/*
* If told to disconnect, it means either the stream finished its
* business or some other block had a failure. We'll just do a
* clean up and return without an error. But if it happened before
* all the pool setup operations finished, we'll report it for
* debugging purposes.
*/
case NvSciStreamEventType_Disconnected:
if (!poolData->elementsDone) {
printf("Warning: Pool disconnect before element support\n");
} else if (!poolData->packetsDone) {
printf("Warning: Pool disconnect before packet setup\n");
}
rv = 2;
break;
/*
* The pool doesn't have to do anything immediately on connection, but
* now that the stream is complete we can reduce the timeout to wait
* for the producer and consumer events to arrive.
*/
case NvSciStreamEventType_Connected:
/* Initialize basic stream info */
if (!handleStreamInit(poolData)) {
rv = -1;
}
poolData->common.waitTime = 10 * 1000000;
break;
/* Process all element support from producer and consumer(s) */
case NvSciStreamEventType_Elements:
if (poolData->isC2cPool) {
if (!handleC2cPoolBufferSetup(poolData)) {
rv = -1;
}
} else {
if (!handlePoolBufferSetup(poolData)) {
rv = -1;
}
}
break;
/*
* Check packet/buffer status returned from producer/consumer
* A more sophisticated application might have the means to recover
* from any failures. But in general we expect that in a production
* application, any failures are due to something fundamental going
* wrong like lack of memory/resources, which hopefully has been
* designed out. So these status checks are more useful during
* development, where we just report the issue for debugging purposes.
*
* Once all the status events have been received for all packets
* and buffers, the pool should require no further interaction
* until the time comes to shut down the application. We set the
* wait time to infinite.
*/
case NvSciStreamEventType_PacketStatus:
/* There are multiple ways the status handling could be organized.
* In particular, waiting for status could be interleaved with
* sending the packets. This example waits for status from all
* packets before checking each packet's status.
*/
if (++poolData->numPacketReady < poolData->numPacket) {
break;
}
if (!handlePacketsStatus(poolData)) {
rv = -1;
}
poolData->common.waitTime = -1;
break;
/* All setup complete. Transition to runtime phase */
case NvSciStreamEventType_SetupComplete:
break;
}
/* On failure or final event, clean up the block */
if ((rv < 0) || (1 < rv)) {
deletePool(poolData);
}
return rv;
}
/* Create and register a new pool block */
int32_t createPool_Common(
NvSciStreamBlock* pool,
uint32_t numPacket,
bool isC2cPool)
{
/* Create a data structure to track the block's status */
PoolData* poolData = (PoolData*)calloc(1, sizeof(PoolData));
if (NULL == poolData) {
printf("Failed to allocate data structure for pool\n");
return 0;
}
/* Save the name for debugging purposes */
strcpy(poolData->common.name, "Pool");
/* Save the c2c pool flag */
poolData->isC2cPool = isC2cPool;
/* Save the packet count */
poolData->numPacket = numPacket;
/* Wait time for initial connection event will be 60 seconds */
poolData->common.waitTime = 60 * 1000000;
/* Create a pool block */
NvSciError err =
NvSciStreamStaticPoolCreate(poolData->numPacket,
&poolData->common.block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create pool block\n", err);
deletePool(poolData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(poolData->common.block, poolData, handlePool)) {
deletePool(poolData);
return 0;
}
*pool = poolData->common.block;
return 1;
}

View File

@@ -0,0 +1,55 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - PresentSync block
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/*
* PresentSync blocks do not require any block-specific interactions so we
* use the set of common functions to handle its events.
*/
/* Create and register a new presentSync block */
int32_t createPresentSync(
NvSciStreamBlock* presentSync)
{
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("PresentSync", 0);
if (NULL == blockData) {
return 0;
}
/* Create a PresentSync block */
NvSciError err =
NvSciStreamPresentSyncCreate(sciSyncModule, &blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create PresentSync block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*presentSync = blockData->block;
return 1;
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,57 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - queue block
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/*
* Queue blocks do not require any block-specific interactions so we
* use the set of common functions to handle its events.
*/
/* Create and register a new limiter block */
int32_t createQueue(
NvSciStreamBlock* queue,
uint32_t useMailbox)
{
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon(useMailbox ? "Mailbox" : "FIFO", 0);
if (NULL == blockData) {
return 0;
}
/* Create a queue block */
NvSciError err = useMailbox
? NvSciStreamMailboxQueueCreate(&blockData->block)
: NvSciStreamFifoQueueCreate(&blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create queue block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*queue = blockData->block;
return 1;
}

View File

@@ -0,0 +1,55 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - ReturnSync block
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/*
* ReturnSync blocks do not require any block-specific interactions so we
* use the set of common functions to handle its events.
*/
/* Create and register a new returnSync block */
int32_t createReturnSync(
NvSciStreamBlock* returnSync)
{
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("ReturnSync", 0);
if (NULL == blockData) {
return 0;
}
/* Create a ReturnSync block */
NvSciError err =
NvSciStreamReturnSyncCreate(sciSyncModule, &blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create ReturnSync block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*returnSync = blockData->block;
return 1;
}

View File

@@ -0,0 +1,51 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - event handler abstraction
*/
#ifndef _EVENT_LOOP_H
#define _EVENT_LOOP_H 1
#include <stdint.h>
#include "nvscistream.h"
/*
* Block event handler function.
* Input:
* data: The block's type-specific private data structure
* wait: Flag indicating whether to wait for an event
* Returns:
* +2: Block has completed its last operation and will be destroyed
* +1: An event was found and processed
* 0: No event was found (not an error)
* -1: Block has encountered a fatal error and will be destroyed
*/
typedef int32_t (*BlockFunc)(void* data, uint32_t wait);
/* Table of events to abstract the two approaches for event loops */
typedef struct {
int32_t (*init)(void);
int32_t (*reg)(NvSciStreamBlock, void*, BlockFunc);
int32_t (*regInt)(NvSciStreamBlock);
int32_t (*loop)(void);
} EventFuncs;
/* Chosen event function table */
extern EventFuncs const* eventFuncs;
/* Event tables for the two methods */
extern EventFuncs const eventFuncs_Service;
extern EventFuncs const eventFuncs_Threads;
#endif // _EVENT_LOOP_H

View File

@@ -0,0 +1,339 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - service-based event handling
*
* This file implements the option to handle events for all blocks
* through an event service. Each block adds an event notifier to
* a list. That notifier will be signaled when an event is ready
* on the block. A single main loop waits for one or more of the
* notifiers to trigger, processes events on the corresponding
* blocks, and goes back to waiting. When all blocks have been
* destroyed either due to failure or all payloads being processed,
* the loop exits and the function returns.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <stdbool.h>
#if (QNX == 1)
#include <sys/neutrino.h>
#endif
#include "nvscievent.h"
#include "block_info.h"
#include "event_loop.h"
/* Event service */
static NvSciEventLoopService* service = NULL;
/* List of blocks */
#define MAX_INTERNAL_NOTIFIERS 10
#define MAX_NOTIFIERS MAX_BLOCKS + MAX_INTERNAL_NOTIFIERS
int32_t numBlocks = 0U;
uint32_t numAlive = 0U;
static uint32_t numIntNotifiers = 0U;
static int32_t numNotifiers = 0U;
BlockEventData blocks[MAX_BLOCKS];
BlockEventData* blocksAlive[MAX_BLOCKS];
static NvSciEventNotifier* intNotifiers[MAX_INTERNAL_NOTIFIERS];
static uint32_t success = 1U;
/* Initialize service-based event handling */
static int32_t eventServiceInit(void)
{
/*
* The OS configuration should be NULL for Linux and should
* have a valid configuration for QNX.
* See NvSciEventLoopServiceCreateSafe API Specification for more
* information.
*/
void *osConfig = NULL;
#if (QNX == 1)
struct nto_channel_config config = {0};
/*
* The number of pulses could be calculated based on the
* number of notifiers bind to the event service, number of packets and
* number of events handled by each block.
* (num_of_pulses = num_of_notifiers * 4 + \
* (num_packets + 5) * num_of_endpoints)
* If experienced pulse pool shortage issue in normal operation, increase
* the number of pulses.
* If there are no available pulses in the pool, SIGKILL is delivered
* by default. You may configure the sigevent that you want to be
* delivered when a pulse can't be obtained from the pool.
*
* See NvSciEventLoopServiceCreateSafe API Specification for more
* information.
*/
/* The num_pulses set below is just an example number and should be
* adjusted depending on the use case.
*/
config.num_pulses = 1024U;
config.rearm_threshold = 0;
osConfig = &config;
#endif
/* Create event loop service */
NvSciError err = NvSciEventLoopServiceCreateSafe(1U, osConfig, &service);
if (NvSciError_Success != err) {
printf("Failed (%x) to create event service\n", err);
return 0;
}
return 1;
}
/* Register a new block with the event management */
static int32_t eventServiceRegister(
NvSciStreamBlock blockHandle,
void* blockData,
BlockFunc blockFunc)
{
/* Sanity check to make sure we left room for enough blocks */
if (numBlocks >= MAX_BLOCKS) {
printf("Exceeded maximum number of blocks\n");
return 0;
}
/* Grab the next entry in the list for the new block and fill it in */
BlockEventData* entry = &blocks[numBlocks++];
entry->handle = blockHandle;
entry->data = blockData;
entry->func = blockFunc;
entry->isAlive = true;
entry->retry = false;
/* Create a notifier for events on this block */
NvSciError err =
NvSciStreamBlockEventServiceSetup(entry->handle,
&service->EventService,
&entry->notifier);
if (NvSciError_Success != err ) {
printf("Failed (%x) to create event notifier for block\n", err);
return 0;
}
return 1;
}
/* Register a new block with the event management to handle internal event.
*
* It's only supported on IpcSrc/IpcDst blocks now.
*
* Without user-provided event service, each IpcSrc/IpcDst block creates
* an internal event service and spawns a dispatch thread to handle the
* internal I/O messages.
*
* With the user-provided event service, no internal thread will be created.
* The application needs to wait for events on these internal notifiers.
* When there's a new notification on the internal notifiers, it will
* trigger the NvSciStream callback function automatically.
*
* The application can bind the internal notifiers and the external
* notifiers, which is used to monitor the NvSciStreamEvent on the block,
* to the same event service or different ones. In this sample app, we
* bind them to the same event service and use one thread to handle all
* the events.
*/
static int32_t eventServiceInternalRegister(
NvSciStreamBlock blockHandle)
{
/* Gets notifiers for internal events on this block */
numIntNotifiers = MAX_INTERNAL_NOTIFIERS;
NvSciError err =
NvSciStreamBlockInternalEventServiceSetup(
blockHandle,
&service->EventService,
&numIntNotifiers,
intNotifiers);
if (NvSciError_Success != err) {
printf("Failed (%x) to setup internal event service for block\n", err);
return 0;
}
/* Sanity check to make sure we left room for enough internal notifiers */
if (numIntNotifiers >= MAX_INTERNAL_NOTIFIERS) {
printf("Exceeded maximum number of internal notifiers\n");
return 0;
}
return 1;
}
/* Main service-based event loop */
static int32_t eventServiceLoop(void)
{
int32_t i;
int32_t k;
/*
* Notes on handling notificiations:
* If more than one signal occurs on a notifier in between calls
* to check for events, then NvSciEvent will squash the notifications,
* so only one is received. This means the application must drain
* all pending events on a block after its notifier signals. It won't
* receive new notifications for those pending events.
* A simple implementation might process each block's events in a loop
* until there are no more, and then move on to the next block. But
* this poses a risk of starvation. Consider the case of a stream in
* mailbox mode, where the mailbox already has a waiting payload.
* If the producer receives a PacketReady event, it will obtain
* the packet, fill it with data, and present it to the stream.
* Because the mailbox is full, the packet will immediately be
* returned, resulting in a new PacketReady event. The application
* can go into an infinite loop, generating new payloads on the
* producer without giving the consumer a chance to process them.
* We therefore use an event loop that only processes one event
* per block for each iteration, but keeps track of whether there
* was an event on a block for the previous pass, and if so
* retries it even if no new signal occurred. The event loop
* waits for events only when there was no prior event. Otherwise
* it only polls for new ones.
* For internal notifiers, as handler is registered by NvSciStream
* when creating the notifiers, the handler will be triggered
* automatically when there's new event. Application only needs
* to wait for new events but no need to handle the new events.
*/
/* Pack all notifiers into an array */
NvSciEventNotifier* notifiers[MAX_NOTIFIERS];
/* Initialize loop control parameters */
int64_t timeout = 1000000;
bool event[MAX_NOTIFIERS];
uint32_t numAliveBlocks;
numAlive = numBlocks;
/* Main loop - Handle events until all blocks report completion or fail */
while (numAlive && !atomic_load(&streamDone)) {
numNotifiers = 0;
numAliveBlocks = 0;
/* Acquire the lock */
pthread_mutex_lock(&mutex);
/* Pack the external notifiers for the block */
for (i=0; i<numBlocks; ++i) {
if (blocks[i].isAlive) {
blocksAlive[numAliveBlocks] = &blocks[i];
notifiers[numAliveBlocks] = blocks[i].notifier;
numAliveBlocks++;
}
}
k = numAliveBlocks;
/* Pack the internal notifiers */
for (uint32_t j = 0; j < numIntNotifiers; ++j,++k) {
notifiers[k] = intNotifiers[j];
}
numNotifiers = numAliveBlocks + numIntNotifiers;
/* Release the lock */
pthread_mutex_unlock(&mutex);
/* Wait/poll for events, depending on current timeout */
memset(event, 0, sizeof(event));
NvSciError err = service->WaitForMultipleEventsExt(
&service->EventService,
notifiers,
numNotifiers,
timeout,
event);
if ((NvSciError_Success != err) && (NvSciError_Timeout != err)) {
printf("Failure (%x) while waiting/polling event service\n", err);
return 0;
}
/* Timeout for next pass will be infinite unless we need to retry */
timeout = 1000000;
/*
* Check for events on new blocks that signaled or old blocks that
* had an event on the previous pass. This is done in reverse
* of the order in which blocks were registered. This is because
* producers are created before consumers, and for mailbox mode
* we want to give the consumer a chance to use payloads before
* the producer replaces them.
*/
for (i=numAliveBlocks-1; ((i>=0) && (!atomic_load(&streamDone))); --i) {
/* Get block info */
BlockEventData* entry = blocksAlive[i];
if (entry != NULL) {
if (event[i] || entry->retry) {
/* Reset to no retry for next pass */
entry->retry = false;
/* Skip if this block is no longer in use */
if (entry->data) {
/* Call the block's event handler function */
int32_t rv = entry->func(entry->data, 0);
if (rv < 0) {
/* On failure, no longer check block and app failed */
success = 0U;
entry->data = NULL;
numAlive--;
} else if (rv == 2) {
/* On completion, no longer check block */
entry->isAlive = false;
entry->data = NULL;
numAlive--;
} else if (rv == 1) {
/* If event found, retry next loop */
timeout = 0;
entry->retry = true;
}
}
}
}
}
}
/* Delete internal notifiers */
for (uint32_t j=0; j<numIntNotifiers; ++j) {
intNotifiers[j]->Delete(intNotifiers[j]);
}
/* Delete notifiers */
for (i=0; i<numBlocks; ++i) {
blocks[i].notifier->Delete(blocks[i].notifier);
}
/* Delete service */
service->EventService.Delete(&service->EventService);
return success;
}
/* Table of functions for service-based event handling */
EventFuncs const eventFuncs_Service = {
.init = eventServiceInit,
.reg = eventServiceRegister,
.regInt = eventServiceInternalRegister,
.loop = eventServiceLoop
};

View File

@@ -0,0 +1,129 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - thread-based event handling
*
* This file implements the option to handle events for each block in
* a separate thread. Each thread just has a loop that waits for a
* block event to occur and handles it appropriately, until the block
* has performed all required actions or receives notification that
* the stream has disconnected.
*
* In practice, only a few block types (producer, consumer, and pool)
* receive any events that need to be handled. So a more streamlined
* application might choose to only monitor them, assuming that the
* other blocks can be left alone until the time comes to tear them
* down.
*
* Note: We use standard pthread functions here because it allows this
* sample to run on all operating systems. QNX has its own thread
* management functions which might be more efficient when using
* this approach.
*/
#include <stdint.h>
#include <stdio.h>
#include <pthread.h>
#include "event_loop.h"
/* Structure to track block info */
typedef struct {
NvSciStreamBlock handle;
void* data;
BlockFunc func;
pthread_t thread;
} BlockEventData;
/* List of blocks */
#define MAX_BLOCKS 100U
static uint32_t numBlocks = 0U;
static BlockEventData blocks[MAX_BLOCKS];
static uint32_t success = 1U;
/* The per-thread loop function for each block */
static void* eventThreadFunc(void* arg)
{
/* Simple loop, waiting for events on the block until the block is done */
BlockEventData* entry = (BlockEventData*)arg;
while (1) {
int32_t rv = entry->func(entry->data, 1);
if (rv < 0) {
success = 0U;
break;
} else if (rv == 2) {
break;
}
}
return NULL;
}
/* Initialize per-thread event handling */
static int32_t eventThreadInit(void)
{
/* No special initialization required for this method */
return 1;
}
/* Register a new block with the event management */
static int32_t eventThreadRegister(
NvSciStreamBlock blockHandle,
void* blockData,
BlockFunc blockFunc)
{
/* Sanity check to make sure we left room for enough blocks */
if (numBlocks >= MAX_BLOCKS) {
printf("Exceeded maximum number of blocks\n");
return 0;
}
/* Grab the next entry in the list for the new block and fill it in */
BlockEventData* entry = &blocks[numBlocks++];
entry->handle = blockHandle;
entry->data = blockData;
entry->func = blockFunc;
/* Spawn a thread */
int32_t rv = pthread_create(&entry->thread,
NULL,
eventThreadFunc,
(void*)entry);
if (rv != 0) {
printf("Failed to spawn thread to monitor block\n");
return 0;
}
return 1;
}
/* Main per-thread event loop */
static int32_t eventThreadLoop(void)
{
/*
* Each block has its own thread loop. This main function just needs
* to wait for all of them to exit, and then return any error. This
* waiting can be done in any order.
*/
for (uint32_t i=0; i<numBlocks; ++i) {
(void)pthread_join(blocks[i].thread, NULL);
}
return success;
}
/* Table of functions for per-thread event handling */
EventFuncs const eventFuncs_Threads = {
.init = eventThreadInit,
.reg = eventThreadRegister,
.regInt = NULL,
.loop = eventThreadLoop
};

1287
event_sample_app/main.c Normal file
View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,55 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - usecase #1
*
* This use case consists of CUDA producer and CUDA consumer(s).
* It makes use of the CUDA runtime toolkit.
*
* There are two packet elements, a large data buffer and a small
* buffer containing a CRC checksum value.
*
* The producer operation is very simple. It takes a local source buffer
* filled with simple data, and issues a CUDA command to asynchronously
* copy it to the packet's data buffer. It generates a checksum from the
* source buffer and puts that in the packet's CRC buffer.
*
* The consumer(s) similarly issues a CUDA command to copy the packet's
* data buffer to a local buffer. When finished, it generates a checksum
* from the the local copy and compares it to the value in the packet's
* CRC buffer.
*
* The data buffer is processed through the CUDA engine, with commands issued
* asynchronously. Sync objects must be used to coordinate when it is safe
* to write and read the buffer. The CRC buffer is written and read directly
* through the CPU. It uses immediate mode and it is not necessary to wait
* for the sync objects before accessing it.
*
* In addition to the normal case where producers signal sync objects that
* consumers wait for, and vice versa, this use case also needs the
* producer to be able to wait for the fences it generates, in order
* to protect its local buffer from modification while still in use.
* So this use case also provides an example of CPU waiting for fences.
*/
#ifndef _USECASE1_H
#define _USECASE1_H 1
/* Names for the packet elements */
#define ELEMENT_NAME_DATA 0xdada
#define ELEMENT_NAME_CRC 0xcc
/* Names for the endpoint info */
#define ENDINFO_NAME_PROC 0xabcd
#endif // _USECASE1_H

66
event_sample_app/util.c Normal file
View File

@@ -0,0 +1,66 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - utility functions
*/
#include "util.h"
#define CRC32_POLYNOMIAL 0xEDB88320L
static uint32_t calculateBufferCRC(
uint32_t count,
uint32_t crc,
uint8_t* buffer)
{
static uint32_t crcTable[256];
static int initialized = 0;
uint32_t i, j, tmp;
if (!initialized) {
for (i = 0; i <= 255; i++) {
tmp = i;
for (j = 8; j > 0; j--) {
if (tmp & 1) {
tmp = (tmp >> 1) ^ CRC32_POLYNOMIAL;
} else {
tmp >>= 1;
}
}
crcTable[i] = tmp;
}
initialized = 1;
}
while (count-- != 0) {
tmp = (crc >> 8) & 0x00FFFFFFL;
crc = tmp ^ crcTable[((uint32_t) crc ^ *buffer++) & 0xFF];
}
return crc;
}
uint32_t generateCRC(
uint8_t *data_ptr,
uint32_t height,
uint32_t width,
uint32_t pitch)
{
uint32_t y = 0U;
uint32_t crc = 0U;
for (y = 0U; y < height; y++) {
crc = calculateBufferCRC(width, crc, data_ptr);
data_ptr += pitch;
}
return crc;
}

33
event_sample_app/util.h Normal file
View File

@@ -0,0 +1,33 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* NvSciStream Event Loop Driven Sample App - utilities
*/
#ifndef _UTIL_H
#define _UTIL_H 1
#include <stdint.h>
#if defined(__x86_64__)
#define cuDeviceGetUuid cuDeviceGetUuid_v2
#endif
/* CRC checksum generator */
extern uint32_t generateCRC(
uint8_t *data_ptr,
uint32_t height,
uint32_t width,
uint32_t pitch);
#endif // _UTIL_H

1
push_info.txt Normal file
View File

@@ -0,0 +1 @@
jetson_38.2.1

126
rawstream/Makefile Normal file
View File

@@ -0,0 +1,126 @@
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
#
# Location of common libraries
LIB_DIR = /usr/lib/aarch64-linux-gnu
# NOTE: This directory PATH will be moved from "tegra" to "nvidia".
TEGRA_LIB_DIR ?= /usr/lib/aarch64-linux-gnu/tegra
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
# Location of NVSCI header
NVSCI_HEADER_DIR ?= /usr/include/nvsci_headers
NVSCI_LIB_DIR = $(TEGRA_LIB_DIR)
GCC ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(GCC)
# internal flags
NVCCFLAGS :=
CCFLAGS :=
LDFLAGS :=
# Extra user flags
EXTRA_NVCCFLAGS ?=
EXTRA_LDFLAGS ?=
EXTRA_CCFLAGS ?=
override abi := aarch64
LDFLAGS += --dynamic-linker=/lib/ld-linux-aarch64.so.1
# Debug build flags
dbg = 0
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
TARGET := debug
else
TARGET := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
################################################################################
# Common includes and paths
INCLUDES := -I./
LIBRARIES := -L$(LIB_DIR)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
CUDA_SEARCH_PATH += $(CUDA_PATH)/lib/stubs
CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
ifeq ("$(CUDALIB)","")
$(error ERROR - libcuda.so not found, CUDA Driver is not installed or CUDA_PATH is not correctly set.)
else
CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
LIBRARIES += -L$(CUDALIB) -lcuda -lrt
endif
# Includes and paths for NVSCI libraries
NVSCIBUFHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscibuf.h -print 2>/dev/null)
NVSCISYNCHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscisync.h -print 2>/dev/null)
NVSCIIPCHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvsciipc.h -print 2>/dev/null)
ifeq ("$(NVSCIBUFHEADER)","")
$(error ERROR - nvscibuf.h not found in $(NVSCI_HEADER_DIR))
endif
ifeq ("$(NVSCISYNCHEADER)","")
$(error ERROR - nvscisync.h not found in $(NVSCI_HEADER_DIR))
endif
ifeq ("$(NVSCIIPCHEADER)","")
$(error ERROR - nvsciipc.h not found in $(NVSCI_HEADER_DIR))
endif
INCLUDES += -I$(NVSCI_HEADER_DIR)
LIBRARIES += -L$(NVSCI_LIB_DIR) -lnvscibuf -lnvscisync -lnvscievent -lnvsciipc -lnvscistream
ALL_CCFLAGS += --std=c++11 --threads 0
################################################################################
# Target rules
OUTPUT := rawstream
all: build
build: $(OUTPUT)
OBJ := rawstream_consumer.o
OBJ += rawstream_cuda.o
OBJ += rawstream_ipc_linux.o
OBJ += rawstream_main.o
OBJ += rawstream_producer.o
%.o: %.c
$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) -o $@ -c $<
$(OUTPUT): $(OBJ)
$(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES)
run: build
$(OUTPUT)
testrun: build
clean:
rm -f $(OBJ) $(OUTPUT)
clobber: clean

41
rawstream/README.txt Normal file
View File

@@ -0,0 +1,41 @@
Rawstream Sample App - README
Copyright (c) 2022 NVIDIA Corporation. All rights reserved.
NVIDIA Corporation and its licensors retain all intellectual property and
proprietary rights in and to this software, related documentation and any
modifications thereto. Any use, reproduction, disclosure or distribution
of this software and related documentation without an express license
agreement from NVIDIA Corporation is strictly prohibited.
---
# rawstream - NvStreams Rawstream Sample App
## Description
This directory contains a raw stream sample application using NvSciBuf,
NvSciSync and NvSciIpc.
## Build the application
The rawstream sample includes source code and a Makefile.
1. On the host system, navigate to the sample application directory:
$ cd <top>/samples/nvsci/rawstream/
2. Build the sample application:
$ make clean
$ make
## Examples of how to run the sample application:
$ sudo ./rawstream -p &
$ sudo ./rawstream -c
## Examples of how to run the sample application for late attach:
$ sudo ./rawstream -p -l &
$ sudo ./rawstream -c -l

152
rawstream/rawstream.h Normal file
View File

@@ -0,0 +1,152 @@
/*
* Copyright (c) 2020-2025 NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef _rawstream_h
#define _rawstream_h
#include <unistd.h>
#include <pthread.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <assert.h>
#include <nvscisync.h>
#include <nvscibuf.h>
#include "cuda.h"
#include "cuda_runtime_api.h"
// Constants controlling configuration
#define totalFrames 32U
#define totalBuffers 4U
#if defined(__x86_64__)
#define cuDeviceGetUuid cuDeviceGetUuid_v2
#endif
// Sync/Buf modules
extern NvSciSyncModule syncModule;
extern NvSciBufModule bufModule;
// Exchanged sync info
extern NvSciSyncAttrList producerSignalAttrs;
extern NvSciSyncAttrList consumerSignalAttrs;
extern NvSciSyncAttrList producerWaitAttrs;
extern NvSciSyncAttrList consumerWaitAttrs;
extern NvSciSyncAttrList prodToConsAttrs;
extern NvSciSyncAttrList consToProdAttrs;
extern NvSciSyncObj consumerSignalObj;
extern NvSciSyncObj producerSignalObj;
extern NvSciSyncObj consumerWaitObj;
extern NvSciSyncObj producerWaitObj;
// Exchanged buf info
extern NvSciBufAttrList producerWriteAttrs;
extern NvSciBufAttrList consumerReadAttrs;
extern NvSciBufAttrList combinedBufAttrs;
// CUDA info common to producer and consumer
typedef struct {
int deviceId;
CUuuid uuid;
cudaStream_t stream;
cudaExternalSemaphore_t signalerSem;
cudaExternalSemaphore_t waiterSem;
NvSciBufType bufType;
uint64_t bufSize;
uint8_t* bufCopy;
} CudaClientInfo;
// List of buffers with status
typedef struct {
// Buffer handle
NvSciBufObj obj;
// CUDA external memory object
cudaExternalMemory_t extMem;
// Mapping into virtual memory
uint8_t* ptr;
// Current owner (0 = producer, 1 = consumer)
uint32_t owner;
// Fence to wait for
NvSciSyncFence fence;
// Checksum for error checking
uint32_t crc;
} Buffer;
extern Buffer buffers[totalBuffers];
// packet data
// Note: The checksum is not, in general, needed in a real streaming
// application. All that is required is something to identify
// the buffer and provide the fences. See comments in the producer
// and consumer for the reason for the checksum.
typedef struct {
// buffer identifier
uint32_t bufferId;
// buffer checksum
uint32_t crc;
// Fence to wait for
NvSciSyncFenceIpcExportDescriptor fenceDesc;
} Packet;
// IPC related info
typedef struct {
// NvSciIpc handle
NvSciIpcEndpoint endpoint;
// IPC channel info
struct NvSciIpcEndpointInfo info;
// QNX: Channel id to get event
int32_t chId;
// QNX: Connection id to send event in library
int32_t connId;
// Linux: IPC event fd
int32_t ipcEventFd;
} IpcWrapper;
extern IpcWrapper ipcWrapper;
// CUDA data types
typedef struct cudaExternalSemaphoreHandleDesc cudaExternalSemaphoreHandleDesc;
typedef struct cudaExternalMemoryHandleDesc cudaExternalMemoryHandleDesc;
typedef struct cudaExternalMemoryBufferDesc cudaExternalMemoryBufferDesc;
typedef struct cudaExternalSemaphoreWaitParams cudaExternalSemaphoreWaitParams;
typedef struct cudaExternalSemaphoreSignalParams cudaExternalSemaphoreSignalParams;
// Utility functions
extern uint32_t GenerateCRC(uint8_t* data, uint32_t width, uint32_t height, uint32_t pitch);
// Thread functions
extern void* producerFunc(void*);
extern void* consumerFunc(void*);
// IPC functions
extern NvSciError ipcInit(const char* endpointName, IpcWrapper* ipcWrapper);
extern NvSciError ipcSend(IpcWrapper* ipcWrapper, const void* buf, const size_t size);
extern NvSciError ipcRecvFill(IpcWrapper* ipcWrapper, void* buf, const size_t size);
extern void ipcDeinit(IpcWrapper* ipcWrapper);
// CUDA-specific operations
extern bool setupCuda(CudaClientInfo* info);
extern bool setupCudaSync(CudaClientInfo* info,
NvSciSyncObj sciSignalObj,
NvSciSyncObj sciWaitObj);
extern bool setupCudaBufAttr(CudaClientInfo* info,
NvSciBufAttrList attrs);
extern bool setupCudaBuffer(CudaClientInfo* info,
Buffer* buf);
extern bool waitCudaFence(CudaClientInfo* info,
Buffer* buf);
extern bool signalCudaFence(CudaClientInfo* info,
Buffer* buf);
extern void deinitCuda(CudaClientInfo* info);
extern void deinitCudaBuffer(Buffer* buf, int num);
#endif // _rawstream_h

View File

@@ -0,0 +1,670 @@
/*
* Copyright (c) 2020-2024 NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "rawstream.h"
void* consumerFunc(void* arg)
{
CudaClientInfo cudaInfo;
NvSciError sciErr;
int cudaErr;
void* recvWaitListDesc = NULL;
void* recvObjAndListDesc = NULL;
void* recvBufListDesc = NULL;
*(int*)arg = 1;
fprintf(stderr, "Consumer starting\n");
// Do common cuda initialization
if (!setupCuda(&cudaInfo)) {
goto done;
}
// Create an empty sync attribute list for signaling permissions.
sciErr = NvSciSyncAttrListCreate(syncModule, &consumerSignalAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to create consumer signal attrs (%x)\n",
sciErr);
goto done;
}
// Query CUDA for attributes needed to signal syncs
cudaErr = cudaDeviceGetNvSciSyncAttributes(consumerSignalAttrs,
cudaInfo.deviceId,
cudaNvSciSyncAttrSignal);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Could not query signal attributes from CUDA (%d)\n",
cudaErr);
goto done;
}
fprintf(stderr, "Consumer signal attributes established\n");
// Create an empty sync attribute list for waiting permissions.
sciErr = NvSciSyncAttrListCreate(syncModule, &consumerWaitAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to create consumer wait attrs (%x)\n",
sciErr);
goto done;
}
// Query CUDA for attributes needed to wait for syncs
cudaErr = cudaDeviceGetNvSciSyncAttributes(consumerWaitAttrs,
cudaInfo.deviceId,
cudaNvSciSyncAttrWait);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Could not query wait attributes from CUDA (%d)\n",
cudaErr);
goto done;
}
fprintf(stderr, "Consumer wait attributes established\n");
// Export consumer's wait attributes to a form suitable for IPC
size_t sendWaitAttrListSize = 0U;
void* sendWaitListDesc = NULL;
sciErr = NvSciSyncAttrListIpcExportUnreconciled(&consumerWaitAttrs,
1,
ipcWrapper.endpoint,
&sendWaitListDesc,
&sendWaitAttrListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to export consumer wait attrs (%x)\n",
sciErr);
goto done;
}
// Send the size of the consumer's wait attributes to the producer,
// so it knows how much data to expect
sciErr = ipcSend(&ipcWrapper,
&sendWaitAttrListSize,
sizeof(sendWaitAttrListSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to send consumer wait attrs size (%x)\n",
sciErr);
goto done;
}
// Send the exported form of the consumer's wait attributes
sciErr = ipcSend(&ipcWrapper,
sendWaitListDesc,
sendWaitAttrListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to send consumer wait attrs (%x)\n", sciErr);
goto done;
}
// Wait to receive the size of the producer's wait attributes
size_t recvWaitAttrListSize = 0U;
sciErr = ipcRecvFill(&ipcWrapper,
&recvWaitAttrListSize,
sizeof(recvWaitAttrListSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to recv producer wait attr size (%x)\n",
sciErr);
goto done;
}
// Allocate a buffer big enough to receive the producer's wait attributes
recvWaitListDesc = malloc(recvWaitAttrListSize);
if (recvWaitListDesc == NULL) {
sciErr = NvSciError_InsufficientMemory;
fprintf(stderr,
"Sync attr allocation failed (%x)\n",
sciErr);
goto done;
}
// Wait to receive producer's wait attributes
sciErr = ipcRecvFill(&ipcWrapper,
recvWaitListDesc,
recvWaitAttrListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to recv producer wait attrs (%x)\n",
sciErr);
goto done;
}
// Convert the received producer wait attributes to an attribute list
sciErr = NvSciSyncAttrListIpcImportUnreconciled(syncModule,
ipcWrapper.endpoint,
recvWaitListDesc,
recvWaitAttrListSize,
&producerWaitAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to import producer wait attrs (%x)\n",
sciErr);
goto done;
}
// Get combined attributes for consumer to producer signaling
NvSciSyncAttrList syncAllAttrs[2], syncConflictAttrs;
syncAllAttrs[0] = consumerSignalAttrs;
syncAllAttrs[1] = producerWaitAttrs;
sciErr = NvSciSyncAttrListReconcile(syncAllAttrs, 2,
&consToProdAttrs, &syncConflictAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't merge consumer->producer attrs (%x)\n",
sciErr);
goto done;
}
// Allocate consumer to producer sync object
sciErr = NvSciSyncObjAlloc(consToProdAttrs, &consumerSignalObj);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't allocate consumer->producer sync (%x)\n",
sciErr);
goto done;
}
// Export sync attributes and object to a form suitable for IPC
void* sendObjAndListDesc = NULL;
size_t sendObjAndListSize = 0U;
sciErr = NvSciSyncIpcExportAttrListAndObj(consumerSignalObj,
NvSciSyncAccessPerm_WaitOnly,
ipcWrapper.endpoint,
&sendObjAndListDesc,
&sendObjAndListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't export consumer->producer sync description (%x)\n",
sciErr);
goto done;
}
// Send the size of the sync description to the producer,
// so it knows how much data to expect
sciErr = ipcSend(&ipcWrapper, &sendObjAndListSize, sizeof(size_t));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't send consumer->producer sync description size(%x)\n",
sciErr);
goto done;
}
// Send the sync description to the producer
sciErr = ipcSend(&ipcWrapper, sendObjAndListDesc, sendObjAndListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't send consumer->producer sync description (%x)\n",
sciErr);
goto done;
}
// Wait to receive the size of the producer->consumer sync desription
size_t recvObjAndListSize = 0U;
sciErr = ipcRecvFill(&ipcWrapper,
&recvObjAndListSize,
sizeof(size_t));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't recv producer->consumer sync description size (%x)\n",
sciErr);
goto done;
}
// Allocate a buffer big enough to receive the description
recvObjAndListDesc = malloc(recvObjAndListSize);
if (NULL == recvObjAndListDesc) {
sciErr = NvSciError_InsufficientMemory;
fprintf(stderr, "Sync description allocation failed (%x)\n", sciErr);
goto done;
}
// Wait to receive producer->consumer sync description
sciErr = ipcRecvFill(&ipcWrapper,
recvObjAndListDesc,
recvObjAndListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't receive producer->consumer sync description (%x)\n",
sciErr);
goto done;
}
// Convert the received producer->consumer sync description to a
// sync attribute list and object
sciErr = NvSciSyncIpcImportAttrListAndObj(syncModule,
ipcWrapper.endpoint,
recvObjAndListDesc,
recvObjAndListSize,
&consumerWaitAttrs,
1,
NvSciSyncAccessPerm_WaitOnly,
ipcWrapper.endpoint,
&consumerWaitObj);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't import producer->consumer sync (%x)\n",
sciErr);
goto done;
}
// Validate imported reconciled attribute list and object
{
NvSciSyncAttrList consumerWaitList;
sciErr = NvSciSyncAttrListValidateReconciledAgainstAttrs(
consToProdAttrs,
NULL,
0,
NvSciSyncAccessPerm_SignalOnly);
if (NvSciError_Success != sciErr) {
fprintf(
stderr,
"Validation of consToProd list failed: %x\n", sciErr);
goto done;
}
sciErr = NvSciSyncObjGetAttrList(consumerWaitObj,
&consumerWaitList);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't get the reconciled list from consumer wait object (%x)\n",
sciErr);
goto done;
}
sciErr = NvSciSyncAttrListValidateReconciledAgainstAttrs(
consumerWaitList,
NULL,
0,
NvSciSyncAccessPerm_WaitOnly);
if (NvSciError_Success != sciErr) {
fprintf(
stderr,
"Validation of imported reconciled consumer wait list failed: %x\n",
sciErr);
goto done;
}
sciErr = NvSciSyncObjValidate(consumerWaitObj);
if (NvSciError_Success != sciErr) {
fprintf(
stderr,
"Validation of imported consumer wait object failed: %x\n",
sciErr);
goto done;
}
}
// Set up CUDA sync objects, importing NvSciSync objects
if (!setupCudaSync(&cudaInfo, consumerSignalObj, consumerWaitObj)) {
goto done;
}
fprintf(stderr, "Consumer exchanged sync objects with producer\n");
// Create an empty buffer attribute list for consumer buffers
sciErr = NvSciBufAttrListCreate(bufModule, &consumerReadAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to create consumer buffer attrs (%x)\n",
sciErr);
goto done;
}
// Fill consumer buffer attribute list with values
NvSciBufAttrKeyValuePair bufKeyValue[4];
NvSciRmGpuId gpuId;
memcpy(&gpuId.bytes, &cudaInfo.uuid.bytes, sizeof(cudaInfo.uuid.bytes));
bufKeyValue[0].key = NvSciBufGeneralAttrKey_GpuId;
bufKeyValue[0].value = &gpuId;
bufKeyValue[0].len = sizeof(gpuId);
NvSciBufType bufType = NvSciBufType_RawBuffer;
bufKeyValue[1].key = NvSciBufGeneralAttrKey_Types;
bufKeyValue[1].value = &bufType;
bufKeyValue[1].len = sizeof(bufType);
NvSciBufAttrValAccessPerm bufPerm = NvSciBufAccessPerm_Readonly;
bufKeyValue[2].key = NvSciBufGeneralAttrKey_RequiredPerm;
bufKeyValue[2].value = &bufPerm;
bufKeyValue[2].len = sizeof(bufPerm);
bool bufAccessFlag = true;
bufKeyValue[3].key = NvSciBufGeneralAttrKey_NeedCpuAccess;
bufKeyValue[3].value = &bufAccessFlag;
bufKeyValue[3].len = sizeof(bufAccessFlag);
sciErr = NvSciBufAttrListSetAttrs(consumerReadAttrs, bufKeyValue, 4);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to fill consumer buffer attrs (%x)\n", sciErr);
goto done;
}
fprintf(stderr, "Consumer buffer attributes established\n");
// Export consumer buffer attributes in a form suitable for IPC
size_t consumerReadAttrsSize = 0U;
void* consumerReadAttrsDesc = NULL;
sciErr = NvSciBufAttrListIpcExportUnreconciled(&consumerReadAttrs,
1,
ipcWrapper.endpoint,
&consumerReadAttrsDesc,
&consumerReadAttrsSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to export consumer buffer attrs (%x)\n",
sciErr);
goto done;
}
// Send size of consumer buffer attributes
sciErr = ipcSend(&ipcWrapper,
&consumerReadAttrsSize,
sizeof(consumerReadAttrsSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to send consumer buffer attrs size (%x)\n",
sciErr);
goto done;
}
// Send consumer buffer attributes
sciErr = ipcSend(&ipcWrapper,
consumerReadAttrsDesc,
consumerReadAttrsSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to send consumer buffer attrs (%x)\n", sciErr);
goto done;
}
// Wait to receive the size of the combined buffer attributes
size_t recvBufListSize = 0U;
sciErr = ipcRecvFill(&ipcWrapper,
&recvBufListSize,
sizeof(recvBufListSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to receive combinedbuffer attr size (%x)\n",
sciErr);
goto done;
}
// Allocate a buffer big enough to receive the combined buffer attributes
recvBufListDesc = malloc(recvBufListSize);
if (NULL == recvBufListDesc) {
sciErr = NvSciError_InsufficientMemory;
fprintf(stderr, "Buffer attr allocation failed(%x)\n", sciErr);
goto done;
}
// Receive the combined buffer attributes
sciErr = ipcRecvFill(&ipcWrapper,
recvBufListDesc,
recvBufListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to recv combined buffer attr desc (%x)\n",
sciErr);
goto done;
}
// Convert the combined buffer attributes to an attribute list
sciErr = NvSciBufAttrListIpcImportReconciled(bufModule,
ipcWrapper.endpoint,
recvBufListDesc,
recvBufListSize,
NULL,
0,
&combinedBufAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to import combined buffer attr (%x)\n",
sciErr);
goto done;
}
// Extract attributes needed by CUDA
if (!setupCudaBufAttr(&cudaInfo, combinedBufAttrs)) {
goto done;
}
// Receive all buffers
for (uint32_t i=0U; i<totalBuffers; ++i) {
Buffer* buf = &buffers[i];
// Receive the next buffer description
NvSciBufObjIpcExportDescriptor objDesc;
sciErr = ipcRecvFill(&ipcWrapper,
&objDesc,
sizeof(NvSciBufObjIpcExportDescriptor));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to recv buffer %d from producer (%x)\n",
i, sciErr);
goto done;
}
// Convert buffer description to a buffer object
sciErr = NvSciBufObjIpcImport(ipcWrapper.endpoint,
&objDesc,
combinedBufAttrs,
NvSciBufAccessPerm_Readonly,
1000U,
&buf->obj);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to import buffer %d from producer (%x)\n",
i, sciErr);
goto done;
}
// Import the buffer into CUDA
if (!setupCudaBuffer(&cudaInfo, buf)) {
goto done;
}
// Validate handles before entering runtime phase
{
NvSciBufAttrList reconciledList;
sciErr = NvSciBufObjGetAttrList(buf->obj, &reconciledList);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't get the reconciled list from NvSciBufObj %d (%x)\n",
i, sciErr);
goto done;
}
NvSciBufAttrListValidateReconciledAgainstAttrs(reconciledList, bufKeyValue, 4);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Validation of combinedBufAttrs list failed: %x\n", sciErr);
goto done;
}
sciErr = NvSciBufObjValidate(buf->obj);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Validation of imported buffer %d failed (%x)\n",
i, sciErr);
goto done;
}
}
}
fprintf(stderr, "Consumer buffers received and established\n");
// Receive all frames
uint32_t currFrame = 0;
uint32_t currBuffer = 0;
Packet packet;
while (currFrame < totalFrames) {
fprintf(stderr, "Consumer starting frame %d in buffer %d\n",
currFrame, currBuffer);
Buffer* buf = &buffers[currBuffer];
// Wait for buffer to be available
while (buf->owner != 1U) {
// Wait for next presented buffer
sciErr = ipcRecvFill(&ipcWrapper, &packet, sizeof(packet));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Failure to recv buffer from producer (%x)\n",
sciErr);
goto done;
}
// Import transmitted fence description to a fence
sciErr = NvSciSyncIpcImportFence(consumerWaitObj,
&packet.fenceDesc,
&buffers[packet.bufferId].fence);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Failure to import fence from producer (%x)\n",
sciErr);
goto done;
}
// copy CRC data from packet
buffers[packet.bufferId].crc = packet.crc;
// Mark consumer as owner of this buffer
buffers[packet.bufferId].owner = 1U;
}
// Wait for fence generated by producer before reading
if (!waitCudaFence(&cudaInfo, buf)) {
goto done;
}
// Read the buffer to the local copy
cudaErr = cudaMemcpy2DAsync(cudaInfo.bufCopy,
cudaInfo.bufSize,
buf->ptr,
cudaInfo.bufSize,
cudaInfo.bufSize,
1,
cudaMemcpyDeviceToHost,
cudaInfo.stream);
if (cudaSuccess != cudaErr) {
fprintf(stderr, "Unable to initiate CUDA copy (%d)\n", cudaErr);
goto done;
}
// Wait for operation to finish, then compute and compare checksum
// IMPORTANT NOTE:
// A normal stream application would not perform these steps.
// A checksum is not required for streaming, and waiting for
// operations to finish (which we only need because the
// checksum is calculated by the CPU) introduces bubbles
// in the hardware pipeline. A real application can rely on
// the generated NvSciSync fences for synchronization.
// These steps are only taken in this sample application
// because the consumer has no output visible to the user,
// so the checksum allows us to verify that the application
// is behaving properly.
cudaDeviceSynchronize();
uint32_t crc = GenerateCRC(cudaInfo.bufCopy,
1,
cudaInfo.bufSize,
cudaInfo.bufSize);
if (buf->crc != crc) {
fprintf(stderr, "Checksums don't match (%x vs %x)\n",
crc, buf->crc);
goto done;
}
fprintf(stderr, "Consumer read frame %d in buffer %d\n",
currFrame, currBuffer);
// Generate new fence indicating when reading has finished
if (!signalCudaFence(&cudaInfo, buf)) {
goto done;
}
// Mark buffer as owned by producer now
buf->owner = 0U;
// Export buffer index and fence for transmission over IPC
// There is no checksum for the return trip.
packet.bufferId = currBuffer;
packet.crc = 0U;
sciErr = NvSciSyncIpcExportFence(&buf->fence,
ipcWrapper.endpoint,
&packet.fenceDesc);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to export consumer fence (%x)\n", sciErr);
goto done;
}
// Send buffer index and fence to producer
sciErr = ipcSend(&ipcWrapper, &packet, sizeof(packet));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Failure to send buffer to producer (%x)\n",
sciErr);
goto done;
}
fprintf(stderr, "Consumer finished frame %d in buffer %d\n",
currFrame, currBuffer);
// Advance buffer and frame
currBuffer = (currBuffer + 1U) % totalBuffers;
currFrame++;
}
// Success
*(int*)arg = 0;
done:
// Free CUDA resources
deinitCudaBuffer(buffers, totalBuffers);
deinitCuda(&cudaInfo);
// Free NvSci objects
if (NULL != consumerSignalAttrs)
NvSciSyncAttrListFree(consumerSignalAttrs);
if (NULL != consumerWaitAttrs)
NvSciSyncAttrListFree(consumerWaitAttrs);
if (NULL != sendWaitListDesc)
NvSciSyncAttrListFreeDesc(sendWaitListDesc);
if (NULL != producerWaitAttrs)
NvSciSyncAttrListFree(producerWaitAttrs);
if (NULL != consToProdAttrs)
NvSciSyncAttrListFree(consToProdAttrs);
if (NULL != syncConflictAttrs)
NvSciSyncAttrListFree(syncConflictAttrs);
if (NULL != consumerSignalObj)
NvSciSyncObjFree(consumerSignalObj);
if (NULL != sendObjAndListDesc)
NvSciSyncAttrListAndObjFreeDesc(sendObjAndListDesc);
if (NULL != consumerWaitObj)
NvSciSyncObjFree(consumerWaitObj);
if (NULL != consumerReadAttrs)
NvSciBufAttrListFree(consumerReadAttrs);
if (NULL != consumerReadAttrsDesc)
NvSciBufAttrListFreeDesc(consumerReadAttrsDesc);
if (NULL != combinedBufAttrs)
NvSciBufAttrListFree(combinedBufAttrs);
// Free malloc'd resources
if (NULL != recvWaitListDesc)
free(recvWaitListDesc);
if (NULL != recvObjAndListDesc)
free(recvObjAndListDesc);
if (NULL != recvBufListDesc)
free(recvBufListDesc);
fprintf(stderr, "Consumer exiting\n");
return NULL;
}

234
rawstream/rawstream_cuda.c Normal file
View File

@@ -0,0 +1,234 @@
/*
* Copyright (c) 2020-2021 NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "rawstream.h"
// Initialize CUDA info
bool setupCuda(CudaClientInfo* info)
{
int cudaErr;
info->deviceId = 0;
info->stream = NULL;
info->signalerSem = NULL;
info->waiterSem = NULL;
info->bufCopy = NULL;
int numOfGPUs = 0;
cudaErr = cudaGetDeviceCount(&numOfGPUs);
if (cudaSuccess != cudaErr) {
fprintf(stderr, "Failed to get compute-capable devices (%d)\n", cudaErr);
return false;
}
cudaErr = cudaSetDevice(info->deviceId);
if (cudaSuccess != cudaErr) {
fprintf(stderr, "Failed to set CUDA device (%d)\n", cudaErr);
return false;
}
cudaErr = cuDeviceGetUuid(&info->uuid, info->deviceId);
if (CUDA_SUCCESS != cudaErr) {
fprintf(stderr, "Failed to query CUDA UUID (%d)\n", cudaErr);
return false;
}
return true;
}
// Create CUDA sync objects and map to imported NvSciSync
bool setupCudaSync(CudaClientInfo* info,
NvSciSyncObj sciSignalObj,
NvSciSyncObj sciWaitObj)
{
cudaExternalSemaphoreHandleDesc extSemDesc;
int cudaErr;
// Create CUDA stream for signaling and waiting
cudaErr = cudaStreamCreateWithFlags(&info->stream,
cudaStreamNonBlocking);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Unable to create CUDA stream (%d)\n",
cudaErr);
return false;
}
// Import signaler sync object to CUDA semaphore
memset(&extSemDesc, 0, sizeof(extSemDesc));
extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
extSemDesc.handle.nvSciSyncObj = sciSignalObj;
cudaErr = cudaImportExternalSemaphore(&info->signalerSem, &extSemDesc);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Unable to import signal sync object to CUDA (%d)\n",
cudaErr);
return false;
}
// Import waiter sync object to CUDA semaphore
memset(&extSemDesc, 0, sizeof(extSemDesc));
extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
extSemDesc.handle.nvSciSyncObj = sciWaitObj;
cudaErr = cudaImportExternalSemaphore(&info->waiterSem, &extSemDesc);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Unable to import wait sync object to CUDA (%d)\n",
cudaErr);
return false;
}
return true;
}
// Extract info from buffer attributes needed by CUDA
bool setupCudaBufAttr(CudaClientInfo* info,
NvSciBufAttrList attrs)
{
NvSciBufAttrKeyValuePair queryKeyValue[] = {
{ NvSciBufGeneralAttrKey_Types, NULL, 0 },
{ NvSciBufRawBufferAttrKey_Size, NULL, 0 },
};
NvSciError sciErr = NvSciBufAttrListGetAttrs(attrs, queryKeyValue, 2);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to query buffer type/size (%x)\n", sciErr);
return false;
}
// TODO: Original sample queries BufType but doesn't seem to do anything
// with it. Might not be needed.
info->bufType = *((NvSciBufType*)(queryKeyValue[0].value));
info->bufSize = *((uint64_t*)(queryKeyValue[1].value));
// Allocate storage for a copy of the buffer contents
info->bufCopy = (uint8_t*)malloc(info->bufSize);
if (NULL == info->bufCopy) {
fprintf(stderr, "Unable to allocate buffer copy\n");
return false;
}
(void)memset(info->bufCopy, 0, info->bufSize);
return true;
}
// Import NvSciBuf into CUDA
bool setupCudaBuffer(CudaClientInfo* info,
Buffer* buf)
{
int cudaErr;
// Import buffer to cuda as external memory
cudaExternalMemoryHandleDesc memHandleDesc;
memset(&memHandleDesc, 0, sizeof(memHandleDesc));
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
memHandleDesc.handle.nvSciBufObject = buf->obj;
memHandleDesc.size = info->bufSize;
cudaErr = cudaImportExternalMemory(&buf->extMem, &memHandleDesc);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Unable to import buffer to CUDA (%d)\n",
cudaErr);
return false;
}
// Map to cuda memory buffer
cudaExternalMemoryBufferDesc bufferDesc;
memset(&bufferDesc, 0, sizeof(bufferDesc));
bufferDesc.size = info->bufSize;
bufferDesc.offset = 0;
cudaErr = cudaExternalMemoryGetMappedBuffer((void *)&buf->ptr,
buf->extMem,
&bufferDesc);
if (cudaSuccess != cudaErr) {
fprintf(stderr, "Unable to map CUDA buffer (%d)\n", cudaErr);
return false;
}
return true;
}
// Tell CUDA to wait for the fence associated with a buffer
bool waitCudaFence(CudaClientInfo* info,
Buffer* buf)
{
cudaExternalSemaphoreWaitParams waitParams;
memset(&waitParams, 0, sizeof(waitParams));
waitParams.params.nvSciSync.fence = &buf->fence;
waitParams.flags = 0;
int cudaErr = cudaWaitExternalSemaphoresAsync(&info->waiterSem,
&waitParams,
1,
info->stream);
if (cudaSuccess != cudaErr) {
fprintf(stderr, "Unable to wait for fence (%d)\n", cudaErr);
return false;
}
NvSciSyncFenceClear(&buf->fence);
return true;
}
// Tell CUDA to generate a fence for a buffer
bool signalCudaFence(CudaClientInfo* info,
Buffer* buf)
{
cudaExternalSemaphoreSignalParams signalParams;
memset(&signalParams, 0, sizeof(signalParams));
signalParams.params.nvSciSync.fence = &buf->fence;
signalParams.flags = 0;
int cudaErr = cudaSignalExternalSemaphoresAsync(&info->signalerSem,
&signalParams,
1,
info->stream);
if (cudaSuccess != cudaErr) {
fprintf(stderr, "Unable to signal fence (%d)\n", cudaErr);
return false;
}
return true;
}
void deinitCuda(CudaClientInfo* info)
{
if (NULL != info->bufCopy) {
free(info->bufCopy);
info->bufCopy = NULL;
}
if (NULL != info->signalerSem) {
(void)cudaDestroyExternalSemaphore(info->signalerSem);
info->signalerSem = NULL;
}
if (NULL != info->waiterSem) {
(void)cudaDestroyExternalSemaphore(info->waiterSem);
info->waiterSem = NULL;
}
if (NULL != info->stream) {
(void)cudaStreamDestroy(info->stream);
info->stream = NULL;
}
}
void deinitCudaBuffer(Buffer* buf, int num)
{
int i;
for (i = 0; i < num; ++i) {
if (NULL != buf[i].ptr)
cudaFree(buf[i].ptr);
if (NULL != buf[i].extMem)
(void)cudaDestroyExternalMemory(buf[i].extMem);
if (NULL != buf[i].obj)
NvSciBufObjFree(buf[i].obj);
}
}

View File

@@ -0,0 +1,172 @@
/*
* Copyright (c) 2020-2022 NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "rawstream.h"
// Initialize one end of named communcation channel
NvSciError ipcInit(const char* endpointName, IpcWrapper* ipcWrapper)
{
NvSciError err = NvSciError_Success;
// Open named endpoint
err = NvSciIpcOpenEndpoint(endpointName, &ipcWrapper->endpoint);
if (err != NvSciError_Success) {
fprintf(stderr, "Unable to open endpoint %s (%x)\n",
endpointName, err);
goto fail;
}
// initialize IPC event notifier
err = NvSciIpcGetLinuxEventFd(ipcWrapper->endpoint, &ipcWrapper->ipcEventFd);
if (err != NvSciError_Success) {
fprintf(stderr, "Unable to get Linux event fd (%x)\n", err);
goto fail;
}
// Retrieve endpoint info
err = NvSciIpcGetEndpointInfo(ipcWrapper->endpoint, &ipcWrapper->info);
if (NvSciError_Success != err) {
fprintf(stderr, "Unable to retrieve IPC endpoint info (%x)", err);
goto fail;
}
err = NvSciIpcResetEndpointSafe(ipcWrapper->endpoint);
if (NvSciError_Success != err) {
fprintf(stderr, "Unable to reset IPC endpoint (%x)", err);
}
fail:
return err;
}
// Clean up IPC when done
void ipcDeinit(IpcWrapper* ipcWrapper)
{
NvSciError err = NvSciIpcCloseEndpointSafe(ipcWrapper->endpoint, false);
if (NvSciError_Success != err) {
fprintf(stderr, "NvSciIpcCloseEndpointSafe failed (%x)\n", err);
}
}
// Wait for an event on IPC channel
static NvSciError waitEvent(IpcWrapper* ipcWrapper, uint32_t value)
{
fd_set rfds;
uint32_t event = 0;
NvSciError err;
while (true) {
// Get pending IPC events
err = NvSciIpcGetEventSafe(ipcWrapper->endpoint, &event);
if (NvSciError_Success != err) {
fprintf(stderr, "NvSciIpcGetEventSafe failed (%x)\n", err);
return err;
}
// Return if event is the kind we're looking for
if (0U != (event & value)) {
break;
}
FD_ZERO(&rfds);
FD_SET(ipcWrapper->ipcEventFd, &rfds);
// Wait for signalling indicating new event
if (select(ipcWrapper->ipcEventFd + 1, &rfds, NULL, NULL, NULL) < 0) {
// select failed
return NvSciError_ResourceError;
}
if(!FD_ISSET(ipcWrapper->ipcEventFd, &rfds)) {
return NvSciError_NvSciIpcUnknown;
}
}
return NvSciError_Success;
}
// Send a message over IPC
NvSciError ipcSend(IpcWrapper* ipcWrapper, const void* buf, const size_t size)
{
NvSciError err = NvSciError_Success;
bool done = false;
uint32_t bytes;
// Loop until entire message sent
while (done == false) {
// Wait for room in channel to send a message
err = waitEvent(ipcWrapper, NV_SCI_IPC_EVENT_WRITE);
if (NvSciError_Success != err) {
goto fail;
}
assert(size <= UINT32_MAX);
// Send as much of the message as we can
err = NvSciIpcWriteSafe(ipcWrapper->endpoint, buf, (uint32_t)size,
&bytes);
if (NvSciError_Success != err) {
fprintf(stderr, "IPC write failed (%x)\n", err);
goto fail;
}
// For this simple sample, we just fail if the entire message wasn't
// sent. Could instead retry to send the rest.
if (size != (size_t)bytes) {
fprintf(stderr, "Failed to send entire message (%u < %zu)\n",
bytes, size);
err = NvSciError_NvSciIpcUnknown;
goto fail;
}
done = true;
}
fail:
return err;
}
// Receive a message over IPC
NvSciError ipcRecvFill(IpcWrapper* ipcWrapper, void* buf, const size_t size)
{
NvSciError err = NvSciError_Success;
bool done = false;
uint32_t bytes;
// Loop until entire message received
while (done == false) {
// Wait for incoming data
err = waitEvent(ipcWrapper, NV_SCI_IPC_EVENT_READ);
if (NvSciError_Success != err) {
goto fail;
}
assert(size <= UINT32_MAX);
// Read as much of the message as we can
err = NvSciIpcReadSafe(ipcWrapper->endpoint, buf, (uint32_t)size,
&bytes);
if (NvSciError_Success != err) {
fprintf(stderr, "IPC read failed (%x)\n", err);
goto fail;
}
// For this simple sample, we just fail if the entire message wasn't
// read. Could instead retry to receive the rest.
if (size != (size_t)bytes) {
fprintf(stderr, "Failed to read entire message (%u < %zu)\n",
bytes, size);
err = NvSciError_NvSciIpcUnknown;
goto fail;
}
done = true;
}
fail:
return err;
}

230
rawstream/rawstream_main.c Normal file
View File

@@ -0,0 +1,230 @@
//! \file
//! \brief NvStreams rawstream main file.
//!
//! \copyright
//! SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//! SPDX-License-Identifier: LicenseRef-NvidiaProprietary
//!
//! NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
//! property and proprietary rights in and to this material, related
//! documentation and any modifications thereto. Any use, reproduction,
//! disclosure or distribution of this material and related documentation
//! without an express license agreement from NVIDIA CORPORATION or
//! its affiliates is strictly prohibited.
#include "rawstream.h"
#include <getopt.h>
NvSciSyncModule syncModule = NULL;
NvSciBufModule bufModule = NULL;
NvSciSyncAttrList producerSignalAttrs = NULL;
NvSciSyncAttrList consumerSignalAttrs = NULL;
NvSciSyncAttrList producerWaitAttrs = NULL;
NvSciSyncAttrList consumerWaitAttrs = NULL;
NvSciSyncAttrList prodToConsAttrs = NULL;
NvSciSyncAttrList consToProdAttrs = NULL;
NvSciSyncObj consumerSignalObj = NULL;
NvSciSyncObj producerSignalObj = NULL;
NvSciSyncObj consumerWaitObj = NULL;
NvSciSyncObj producerWaitObj = NULL;
NvSciBufAttrList producerWriteAttrs = NULL;
NvSciBufAttrList consumerReadAttrs = NULL;
NvSciBufAttrList combinedBufAttrs = NULL;
Buffer buffers[totalBuffers];
IpcWrapper ipcWrapper;
int late_attach = 0;
int main(int argc, char *argv[])
{
NvSciError err;
int producer;
const char* endpoint = "Unknown";
int ret = 0;
int opt;
producer = -1;
while ((opt = getopt(argc, argv, "pcl")) != -1) {
switch (opt) {
case 'p':
producer = 1;
endpoint = "Producer";
break;
case 'c':
producer = 0;
endpoint = "Consumer";
break;
case 'l':
late_attach = 1;
break;
default:
fprintf(stderr, "Unknown option: '%c'\n", opt);
}
}
if (producer == -1 || optind < argc) {
fprintf(stderr, "Usage: ./rawstream [-l] {-p or -c}\n");
fprintf(stderr,
"-p denotes producer, -c denotes consumer, -l denotes late-attach \n");
fprintf(stderr,
"either -p or -c should be provided, while -l is optional param\n");
return 1;
}
fprintf(stderr, "%p application starting\n", endpoint);
// Open sync module (shared by both all threads)
err = NvSciSyncModuleOpen(&syncModule);
if (NvSciError_Success != err) {
fprintf(stderr, "%s unable to open sync module (%x)\n",
endpoint, err);
return 1;
}
// Open buf module (shared by both all threads)
err = NvSciBufModuleOpen(&bufModule);
if (NvSciError_Success != err) {
fprintf(stderr, "%s unable to open buf module (%x)\n",
endpoint, err);
ret = 1;
goto close_sync_module;
}
// Initialize IPC library
err = NvSciIpcInit();
if (NvSciError_Success != err) {
fprintf(stderr, "%s unable to init ipc library (%x)\n",
endpoint, err);
ret = 1;
goto close_buf_module;
}
// Establish IPC communications based on endpoint
// TODO: Settle on final IPC channel names
if (producer == 1) {
err = ipcInit("nvscisync_a_0", &ipcWrapper);
} else {
err = ipcInit("nvscisync_a_1", &ipcWrapper);
}
if (NvSciError_Success != err) {
fprintf(stderr, "%s unable to initialize communication (%x)\n",
endpoint, err);
ret = 1;
goto deinit_IPC;
}
// Test communication by exchanging a simple handshake message
const int send_handshake = 12345;
err = ipcSend(&ipcWrapper, &send_handshake, sizeof(send_handshake));
if (NvSciError_Success != err) {
fprintf(stderr, "%s failed to send handshake (%x)\n",
endpoint, err);
ret = 1;
goto deinit_IPC;
}
int recv_handshake = 0;
ipcRecvFill(&ipcWrapper, &recv_handshake, sizeof(recv_handshake));
if (NvSciError_Success != err) {
fprintf(stderr, "%s failed to receive handshake (%x)\n",
endpoint, err);
ret = 1;
goto deinit_IPC;
}
if (send_handshake != recv_handshake) {
fprintf(stderr, "%s handshake did not match (%x)\n",
endpoint, err);
ret = 1;
goto deinit_IPC;
}
// Initialize buffer list
for (uint32_t i=0; i<totalBuffers; ++i) {
buffers[i].owner = 0;
buffers[i].fence = NvSciSyncFenceInitializer;
}
// Launch appropriate thread
if (producer == 1) {
// Launch producer threads
pthread_t producerThread;
if (0 != pthread_create(&producerThread, NULL, producerFunc, &ret)) {
fprintf(stderr, "Failed to launch producer\n");
ret = 1;
goto deinit_IPC;
}
// Wait for thread to finish
(void)pthread_join(producerThread, NULL);
} else {
// Launch consumer threads
pthread_t consumerThread;
if (0 != pthread_create(&consumerThread, NULL, consumerFunc, &ret)) {
fprintf(stderr, "Failed to launch consumer\n");
ret = 1;
goto deinit_IPC;
}
// Wait for thread to finish
(void)pthread_join(consumerThread, NULL);
}
deinit_IPC:
ipcDeinit(&ipcWrapper);
(void)NvSciIpcDeinit();
close_buf_module:
(void)NvSciBufModuleClose(bufModule);
close_sync_module:
(void)NvSciSyncModuleClose(syncModule);
fprintf(stderr, "Sample completed\n");
return ret;
}
// Checksum calculation
#define CRC32_POLYNOMIAL 0xEDB88320L
uint32_t GenerateCRC(uint8_t* data_ptr,
uint32_t height,
uint32_t width,
uint32_t pitch)
{
uint32_t y = 0U, x = 0U;
uint32_t crc = 0U, tmp;
static uint32_t crcTable[256];
static int initialized = 0;
//Initilaize CRC table, which is an one time operation
if (!initialized) {
for (int i = 0; i <= 255; i++) {
tmp = i;
for (int j = 8; j > 0; j--) {
if (tmp & 1) {
tmp = (tmp >> 1) ^ CRC32_POLYNOMIAL;
} else {
tmp >>= 1;
}
}
crcTable[i] = tmp;
}
initialized = 1;
}
//Calculate CRC for the data
for (y = 0U; y < height; y++) {
for (x = 0U; x < width; x++) {
tmp = (crc >> 8) & 0x00FFFFFFL;
crc = tmp ^ crcTable[((uint32_t) crc ^ *(data_ptr + x)) & 0xFF];
}
data_ptr += pitch;
}
return crc;
}

View File

@@ -0,0 +1,752 @@
//! \file
//! \brief NvStreams rawstream producer file.
//!
//! \copyright
//! SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//! SPDX-License-Identifier: LicenseRef-NvidiaProprietary
//!
//! NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
//! property and proprietary rights in and to this material, related
//! documentation and any modifications thereto. Any use, reproduction,
//! disclosure or distribution of this material and related documentation
//! without an express license agreement from NVIDIA CORPORATION or
//! its affiliates is strictly prohibited.
#include "rawstream.h"
extern int late_attach;
void* producerFunc(void* arg)
{
CudaClientInfo cudaInfo;
NvSciError sciErr;
int cudaErr;
void* recvWaitListDesc = NULL;
void* recvObjAndListDesc = NULL;
void* consumerReadAttrsDesc = NULL;
*(int*)arg = 1;
fprintf(stderr, "Producer starting\n");
// Do common cuda initialization
if (!setupCuda(&cudaInfo)) {
goto done;
}
// Create an empty sync attribute list for signaling permissions.
sciErr = NvSciSyncAttrListCreate(syncModule, &producerSignalAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to create producer signal attrs (%x)\n",
sciErr);
goto done;
}
// Query CUDA for attributes needed to signal syncs
cudaErr = cudaDeviceGetNvSciSyncAttributes(producerSignalAttrs,
cudaInfo.deviceId,
cudaNvSciSyncAttrSignal);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Could not query signal attributes from CUDA (%d)\n",
cudaErr);
goto done;
}
fprintf(stderr, "Producer signal attributes established\n");
// Create an empty sync attribute list for waiting permissions.
sciErr = NvSciSyncAttrListCreate(syncModule, &producerWaitAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to create producer wait attrs (%x)\n",
sciErr);
goto done;
}
// Query CUDA for attributes needed to wait for syncs
cudaErr = cudaDeviceGetNvSciSyncAttributes(producerWaitAttrs,
cudaInfo.deviceId,
cudaNvSciSyncAttrWait);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Could not query wait attributes from CUDA (%d)\n",
cudaErr);
goto done;
}
fprintf(stderr, "Producer wait attributes established\n");
// Export producer's wait attributes to a form suitable for IPC
size_t sendWaitAttrListSize = 0U;
void* sendWaitListDesc = NULL;
sciErr = NvSciSyncAttrListIpcExportUnreconciled(&producerWaitAttrs,
1,
ipcWrapper.endpoint,
&sendWaitListDesc,
&sendWaitAttrListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to export producer wait attrs (%x)\n",
sciErr);
goto done;
}
// Send the size of the producer's wait attributes to the consumer,
// so it knows how much data to expect
sciErr = ipcSend(&ipcWrapper,
&sendWaitAttrListSize,
sizeof(sendWaitAttrListSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to send producer wait attrs size (%x)\n",
sciErr);
goto done;
}
// Send the exported form of the producer's wait attributes
sciErr = ipcSend(&ipcWrapper,
sendWaitListDesc,
sendWaitAttrListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to send producer wait attrs (%x)\n",
sciErr);
goto done;
}
// Wait to receive the size of the consumer's wait attributes
size_t recvWaitAttrListSize = 0U;
sciErr = ipcRecvFill(&ipcWrapper,
&recvWaitAttrListSize,
sizeof(recvWaitAttrListSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to recv consumer wait attr size (%x)\n",
sciErr);
goto done;
}
// Allocate a buffer big enough to receive the consumer's wait attributes
recvWaitListDesc = malloc(recvWaitAttrListSize);
if (NULL == recvWaitListDesc) {
sciErr = NvSciError_InsufficientMemory;
fprintf(stderr,
"Sync attr allocation failed (%x)\n",
sciErr);
goto done;
}
// Wait to receive consumer's wait attributes
sciErr = ipcRecvFill(&ipcWrapper,
recvWaitListDesc,
recvWaitAttrListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to recv consumer wait attrs (%x)\n",
sciErr);
goto done;
}
// Convert the received consumer wait attributes to an attribute list
sciErr = NvSciSyncAttrListIpcImportUnreconciled(syncModule,
ipcWrapper.endpoint,
recvWaitListDesc,
recvWaitAttrListSize,
&consumerWaitAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to import consumer wait attrs (%x)\n",
sciErr);
goto done;
}
// Get combined attributes for producer to consumer signaling
NvSciSyncAttrList syncAllAttrs[2], syncConflictAttrs;
syncAllAttrs[0] = producerSignalAttrs;
syncAllAttrs[1] = consumerWaitAttrs;
sciErr = NvSciSyncAttrListReconcile(syncAllAttrs,
2,
&prodToConsAttrs,
&syncConflictAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't merge producer->consumer attrs (%x)\n",
sciErr);
goto done;
}
// Allocate producer to consumer sync object
sciErr = NvSciSyncObjAlloc(prodToConsAttrs, &producerSignalObj);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't allocate producer->consumer sync (%x)\n",
sciErr);
goto done;
}
// Export sync attributes and object to a form suitable for IPC
void* sendObjAndListDesc = NULL;
size_t sendObjAndListSize = 0U;
sciErr = NvSciSyncIpcExportAttrListAndObj(producerSignalObj,
NvSciSyncAccessPerm_WaitOnly,
ipcWrapper.endpoint,
&sendObjAndListDesc,
&sendObjAndListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't export producer->consumer sync description (%x)\n",
sciErr);
goto done;
}
// Send the size of the sync description to the consumer,
// so it knows how much data to expect
sciErr = ipcSend(&ipcWrapper, &sendObjAndListSize, sizeof(size_t));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't send producer->consumer sync description size(%x)\n",
sciErr);
goto done;
}
// Send the sync description to the consumer
sciErr = ipcSend(&ipcWrapper, sendObjAndListDesc, sendObjAndListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't send producer->consumer sync description (%x)\n",
sciErr);
goto done;
}
// Wait to receive the size of the consumer->producer sync desription
size_t recvObjAndListSize = 0U;
sciErr = ipcRecvFill(&ipcWrapper,
&recvObjAndListSize,
sizeof(size_t));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't recv consumer->produce sync description size (%x)\n",
sciErr);
goto done;
}
// Allocate a buffer big enough to receive the description
recvObjAndListDesc = malloc(recvObjAndListSize);
if (NULL == recvObjAndListDesc) {
sciErr = NvSciError_InsufficientMemory;
fprintf(stderr,
"Sync description allocation failed (%x)\n",
sciErr);
goto done;
}
// Wait to receive consumer->producer sync description
sciErr = ipcRecvFill(&ipcWrapper,
recvObjAndListDesc,
recvObjAndListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't receive consumer->producer sync description (%x)\n",
sciErr);
goto done;
}
// Convert the received consumer->producer sync description to a
// sync attribute list and object
sciErr = NvSciSyncIpcImportAttrListAndObj(syncModule,
ipcWrapper.endpoint,
recvObjAndListDesc,
recvObjAndListSize,
&producerWaitAttrs,
1,
NvSciSyncAccessPerm_WaitOnly,
ipcWrapper.endpoint,
&producerWaitObj);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Can't import consumer->producer sync (%x)\n", sciErr);
goto done;
}
// Set up CUDA sync objects, importing NvSciSync objects
if (!setupCudaSync(&cudaInfo, producerSignalObj, producerWaitObj)) {
goto done;
}
fprintf(stderr, "Producer exchanged sync objects with consumer\n");
// Create an empty buffer attribute list for producer buffers
sciErr = NvSciBufAttrListCreate(bufModule, &producerWriteAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to create producer buffer attrs (%x)\n",
sciErr);
goto done;
}
// Fill producer buffer attribute list with values
NvSciBufAttrKeyValuePair bufKeyValue[7];
NvSciRmGpuId gpuId;
memcpy(&gpuId.bytes, &cudaInfo.uuid.bytes, sizeof(cudaInfo.uuid.bytes));
bufKeyValue[0].key = NvSciBufGeneralAttrKey_GpuId;
bufKeyValue[0].value = &gpuId;
bufKeyValue[0].len = sizeof(gpuId);
NvSciBufType bufType = NvSciBufType_RawBuffer;
bufKeyValue[1].key = NvSciBufGeneralAttrKey_Types;
bufKeyValue[1].value = &bufType;
bufKeyValue[1].len = sizeof(bufType);
NvSciBufAttrValAccessPerm bufPerm = NvSciBufAccessPerm_ReadWrite;
bufKeyValue[2].key = NvSciBufGeneralAttrKey_RequiredPerm;
bufKeyValue[2].value = &bufPerm;
bufKeyValue[2].len = sizeof(bufPerm);
bool bufAccessFlag = true;
bufKeyValue[3].key = NvSciBufGeneralAttrKey_NeedCpuAccess;
bufKeyValue[3].value = &bufAccessFlag;
bufKeyValue[3].len = sizeof(bufAccessFlag);
uint64_t rawsize = (128 * 1024);
bufKeyValue[4].key = NvSciBufRawBufferAttrKey_Size;
bufKeyValue[4].value = &rawsize;
bufKeyValue[4].len = sizeof(rawsize);
uint64_t align = (4 * 1024);
bufKeyValue[5].key = NvSciBufRawBufferAttrKey_Align;
bufKeyValue[5].value = &align;
bufKeyValue[5].len = sizeof(align);
if (late_attach) {
// Add late peer location attribute
NvSciBufPeerLocationInfo peerLocation;
peerLocation.socID = NV_SCI_BUF_PEER_INFO_SELF_SOCID;
peerLocation.vmID = NV_SCI_BUF_PEER_INFO_SELF_VMID;
peerLocation.reserved = 0;
bufKeyValue[6].key = NvSciBufGeneralAttrKey_PeerLocationInfo;
bufKeyValue[6].value = &peerLocation;
bufKeyValue[6].len = sizeof(peerLocation);
sciErr = NvSciBufAttrListSetAttrs(producerWriteAttrs, bufKeyValue, 7);
} else {
sciErr = NvSciBufAttrListSetAttrs(producerWriteAttrs, bufKeyValue, 6);
}
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to fill producer buffer attrs (%x)\n", sciErr);
goto done;
}
fprintf(stderr, "Producer buffer attributes established\n");
void* sendBufListDesc = NULL;
if (late_attach) {
// We don't have peer attributes. Just reconciled our own attributes
NvSciBufAttrList bufAllAttrs[2], bufConflictAttrs;
bufAllAttrs[0] = producerWriteAttrs;
// bufAllAttrs[1] = consumerReadAttrs;
sciErr = NvSciBufAttrListReconcile(bufAllAttrs, 1,
&combinedBufAttrs, &bufConflictAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Can't merge buffer attrs (%x)\n", sciErr);
goto done;
}
// Allocate all buffers
for (uint32_t i=0U; i<totalBuffers; ++i) {
Buffer* buf = &buffers[i];
// Allocate the buffer
sciErr = NvSciBufObjAlloc(combinedBufAttrs, &buf->obj);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Can't allocate buffer %d (%x)\n", i, sciErr);
goto done;
}
}
}
// Wait to receive the size of the consumer's buffer attributes
size_t consumerReadAttrsSize = 0U;
sciErr = ipcRecvFill(&ipcWrapper,
&consumerReadAttrsSize,
sizeof(consumerReadAttrsSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to recv consumer buffer attr size (%x)\n",
sciErr);
goto done;
}
// Allocate a buffer big enough to receive the consumer's buffer attributes
consumerReadAttrsDesc = malloc(consumerReadAttrsSize);
if (NULL == recvWaitListDesc) {
sciErr = NvSciError_InsufficientMemory;
fprintf(stderr, "Buffer attr allocation failed(%x)\n", sciErr);
goto done;
}
// Wait to receive the consumer's buffer attributes
sciErr = ipcRecvFill(&ipcWrapper,
consumerReadAttrsDesc,
consumerReadAttrsSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to recv consumer buffer attrs (%x)\n", sciErr);
goto done;
}
// Convert the received consumer buffer attributes to an attribute list
sciErr = NvSciBufAttrListIpcImportUnreconciled(bufModule,
ipcWrapper.endpoint,
consumerReadAttrsDesc,
consumerReadAttrsSize,
&consumerReadAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to import consumer buffer attrs (%x)\n",
sciErr);
goto done;
}
// Get combined attributes for buffers
NvSciBufAttrList bufAllAttrs[2], bufConflictAttrs;
bufAllAttrs[0] = producerWriteAttrs;
bufAllAttrs[1] = consumerReadAttrs;
sciErr = NvSciBufAttrListReconcile(bufAllAttrs, 2,
&combinedBufAttrs, &bufConflictAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Can't merge buffer attrs (%x)\n", sciErr);
goto done;
}
// Export combined buffer attributes to a form suitable for IPC
sendBufListDesc = NULL;
size_t sendBufListSize = 0U;
sciErr = NvSciBufAttrListIpcExportReconciled(combinedBufAttrs,
ipcWrapper.endpoint,
&sendBufListDesc,
&sendBufListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't export reconciled buffer attrs to consumer (%x)\n",
sciErr);
goto done;
}
// Send the size of the combined buffer attributes to the consumer,
// so it knows how much data to expect
sciErr = ipcSend(&ipcWrapper,
&sendBufListSize,
sizeof(sendBufListSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to send combined buffer attrs size (%x)\n",
sciErr);
goto done;
}
// Send the exported form of the combined buffer attributes
sciErr = ipcSend(&ipcWrapper,
sendBufListDesc,
sendBufListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to send combined buffer attrs (%x)\n", sciErr);
goto done;
}
// Extract attributes needed by CUDA
if (!setupCudaBufAttr(&cudaInfo, combinedBufAttrs)) {
goto done;
}
// Export all buffers
for (uint32_t i=0U; i<totalBuffers; ++i) {
Buffer* buf = &buffers[i];
if (0 == late_attach) {
// Allocate the buffer
sciErr = NvSciBufObjAlloc(combinedBufAttrs, &buf->obj);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Can't allocate buffer %d (%x)\n", i, sciErr);
goto done;
}
} else {
NvSciBufAttrList bufAllAttrs[1];
///NV
//Use imported consumer attribute list.
// bufAllAttrs[0] = producerWriteAttrs;
bufAllAttrs[0] = consumerReadAttrs;
// Invoke NvSciBufObjAttachPeer() before exporting the SciBufObj
sciErr = NvSciBufObjAttachPeer(buf->obj, bufAllAttrs, 1);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "NvSciBufObjAttachPeer call failed error: %x\n", sciErr);
goto done;
} else {
fprintf(stderr, "NvSciBufObjAttachPeer call succeeded\n");
}
}
// Export buffer object to a form suitable for IPC
// Note: Unlike attribute lists, the exported form of objects has
// a fixed size.
NvSciBufObjIpcExportDescriptor objDesc;
sciErr = NvSciBufObjIpcExport(buf->obj,
NvSciBufAccessPerm_ReadWrite,
ipcWrapper.endpoint,
&objDesc);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to export buffer %d object to consumer (%x)\n",
i, sciErr);
goto done;
}
// Send the buffer description to the consumer
sciErr = ipcSend(&ipcWrapper,
&objDesc,
sizeof(NvSciBufObjIpcExportDescriptor));
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to send buffer %d (%x)\n", i, sciErr);
goto done;
}
// Import the buffer into CUDA
if (!setupCudaBuffer(&cudaInfo, buf)) {
goto done;
}
{
NvSciBufAttrList reconciledList;
sciErr = NvSciBufObjGetAttrList(buf->obj, &reconciledList);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Can't get the reconciled list from NvSciBufObj %d (%x)\n", i, sciErr);
goto done;
}
sciErr = NvSciBufAttrListValidateReconciledAgainstAttrs(
reconciledList, bufKeyValue, 6);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Validation of combinedBufAttrs failed (%x)\n", sciErr);
goto done;
}
sciErr = NvSciBufObjValidate(buf->obj);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Validation of buffer %d failed (%x)\n", i, sciErr);
goto done;
}
}
}
fprintf(stderr, "Producer buffers established and transmitted\n");
// Validate handles before starting the runtime phase
{
NvSciSyncAttrList producerWaitList;
sciErr = NvSciSyncAttrListValidateReconciledAgainstAttrs(
prodToConsAttrs,
NULL,
0,
NvSciSyncAccessPerm_SignalOnly);
if (NvSciError_Success != sciErr) {
fprintf(
stderr,
"Validation of prodToCons list failed: %x\n", sciErr);
goto done;
}
sciErr = NvSciSyncObjGetAttrList(producerWaitObj,
&producerWaitList);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't get the reconciled list from producer wait object (%x)\n",
sciErr);
goto done;
}
sciErr = NvSciSyncAttrListValidateReconciledAgainstAttrs(
producerWaitList,
NULL,
0,
NvSciSyncAccessPerm_WaitOnly);
if (NvSciError_Success != sciErr) {
fprintf(
stderr,
"Validation of imported reconciled producer wait list failed: %x\n",
sciErr);
goto done;
}
sciErr = NvSciSyncObjValidate(producerWaitObj);
if (NvSciError_Success != sciErr) {
fprintf(
stderr,
"Validation of imported producer wait object failed: %x\n",
sciErr);
goto done;
}
}
// Send all frames
uint32_t currFrame = 0;
uint32_t currBuffer = 0;
Packet packet;
while (currFrame < totalFrames) {
fprintf(stderr, "Producer starting frame %d in buffer %d\n",
currFrame, currBuffer);
Buffer* buf = &buffers[currBuffer];
// Wait for buffer to be available
// Note: On first frame for each buffer, the producer already owns
// it, so this is skipped. On subsequent frames it must wait
// for the buffer's return.
while (buf->owner != 0U) {
// Wait for next returned buffer
sciErr = ipcRecvFill(&ipcWrapper, &packet, sizeof(packet));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Failure to recv buffer from consumer (%x)\n",
sciErr);
goto done;
}
// Import transmitted fence description to a fence
sciErr = NvSciSyncIpcImportFence(producerWaitObj,
&packet.fenceDesc,
&buffers[packet.bufferId].fence);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Failure to import fence from consumer (%x)\n",
sciErr);
goto done;
}
// Extract checksum from packet
buffers[packet.bufferId].crc = packet.crc;
// Mark producer as owner of this buffer
buffers[packet.bufferId].owner = 0U;
}
// Wait for fence returned by consumer before rendering
if (!waitCudaFence(&cudaInfo, buf)) {
goto done;
}
// CUDA rendering to buffer
(void)memset(cudaInfo.bufCopy, (currFrame & 0xFF), cudaInfo.bufSize);
cudaErr = cudaMemcpy2DAsync(buf->ptr,
cudaInfo.bufSize,
cudaInfo.bufCopy,
cudaInfo.bufSize,
cudaInfo.bufSize,
1,
cudaMemcpyHostToDevice,
cudaInfo.stream);
if (cudaSuccess != cudaErr) {
fprintf(stderr, "Unable to initiate CUDA copy (%d)\n", cudaErr);
goto done;
}
// Generate new fence for the sync object
if (!signalCudaFence(&cudaInfo, buf)) {
goto done;
}
// Wait for operation to finish and compute checksum
// IMPORTANT NOTE:
// A normal stream application would not perform these steps.
// A checksum is not required for streaming, and waiting for
// operations to finish (which we only need because the
// checksum is calculated by the CPU) introduces bubbles
// in the hardware pipeline. A real application can rely on
// the generated NvSciSync fences for synchronization.
// These steps are only taken in this sample application
// because the consumer has no output visible to the user,
// so the checksum allows us to verify that the application
// is behaving properly.
cudaDeviceSynchronize();
buf->crc = GenerateCRC(cudaInfo.bufCopy,
1,
cudaInfo.bufSize,
cudaInfo.bufSize);
fprintf(stderr, "Producer wrote frame %d in buffer %d\n",
currFrame, currBuffer);
// Mark buffer as owned by consumer now
buf->owner = 1U;
// Export buffer index, checksum, and fence for transmission over IPC
packet.bufferId = currBuffer;
packet.crc = buf->crc;
sciErr = NvSciSyncIpcExportFence(&buf->fence,
ipcWrapper.endpoint,
&packet.fenceDesc);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to export producer fence (%x)\n", sciErr);
goto done;
}
// Send buffer index and fence to consumer
sciErr = ipcSend(&ipcWrapper, &packet, sizeof(packet));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Failure to send buffer to consumer (%x)\n",
sciErr);
goto done;
}
fprintf(stderr, "Producer finished frame %d in buffer %d\n",
currFrame, currBuffer);
// Advance buffer and frame
currBuffer = (currBuffer + 1U) % totalBuffers;
currFrame++;
}
// Success
*(int*)arg = 0;
done:
// Free CUDA resources
deinitCudaBuffer(buffers, totalBuffers);
deinitCuda(&cudaInfo);
// Free NvSci objects
if (NULL != producerSignalAttrs)
NvSciSyncAttrListFree(producerSignalAttrs);
if (NULL != consumerWaitAttrs)
NvSciSyncAttrListFree(consumerWaitAttrs);
if (NULL != sendWaitListDesc)
NvSciSyncAttrListFreeDesc(sendWaitListDesc);
if (NULL != producerWaitAttrs)
NvSciSyncAttrListFree(producerWaitAttrs);
if (NULL != prodToConsAttrs)
NvSciSyncAttrListFree(prodToConsAttrs);
if (NULL != syncConflictAttrs)
NvSciSyncAttrListFree(syncConflictAttrs);
if (NULL != producerSignalObj)
NvSciSyncObjFree(producerSignalObj);
if (NULL != sendObjAndListDesc)
NvSciSyncAttrListAndObjFreeDesc(sendObjAndListDesc);
if (NULL != producerWaitObj)
NvSciSyncObjFree(producerWaitObj);
if (NULL != producerWriteAttrs)
NvSciBufAttrListFree(producerWriteAttrs);
if (NULL != consumerReadAttrs)
NvSciBufAttrListFree(consumerReadAttrs);
if (NULL != combinedBufAttrs)
NvSciBufAttrListFree(combinedBufAttrs);
if (NULL != sendBufListDesc)
NvSciBufAttrListFreeDesc(sendBufListDesc);
// Free malloc'd resources
if (NULL != recvWaitListDesc)
free(recvWaitListDesc);
if (NULL != recvObjAndListDesc)
free(recvObjAndListDesc);
if (NULL != consumerReadAttrsDesc)
free(consumerReadAttrsDesc);
fprintf(stderr, "Producer exiting\n");
return NULL;
}