Updating prebuilts and/or headers

8193be73ce0a488f62034cb87083cdf09f52cd5d - event_sample_app/block_pool.c
a0bd135d707994a41ed3a4234b5f875a268fed4d - event_sample_app/Makefile
44f6de348f8bdd5cb584b3e8cc4b05e9482dddd2 - event_sample_app/event_loop.h
6ff0f1c2d7ef2e2fa9ece6fdc850b58b87207526 - event_sample_app/block_returnsync.c
1158201e78094e9e866fa99095c9ffc2ec9f5a27 - event_sample_app/block_limiter.c
f5e2aea98ba9264ee1068a700222dff8d5d5c7a4 - event_sample_app/block_c2c.c
ef057870dade9af70656b37340e9bcad35d49380 - event_sample_app/block_multicast.c
641e3634da873970b574b23a1024b2e7155b88ff - event_sample_app/block_consumer_uc1.c
ced622a41d1a48dcb23e6a1a02ae9640ef9b837c - event_sample_app/util.h
3a1013021a572887303fb6db245b5b01fe07e9a0 - event_sample_app/block_producer_uc1.c
dac99c442185b020fbdae07bfc1e7df78343eb83 - event_sample_app/block_info.h
b5dd68bec3ae6f9049aad1cb5a86c3db4af02e17 - event_sample_app/block_presentsync.c
e0861e9fe5d160d47d758464146d7192f9c70a5f - event_sample_app/util.c
d7e42e2b6088ff4596abc7256eb018d757a4021e - event_sample_app/usecase1.h
65ffe5af6ae6bc0418f348167c473849d4697e47 - event_sample_app/block_ipc.c
b52e34443ac441a9df48029de944aa0a50d1b101 - event_sample_app/event_loop_service.c
5001f036389a4f7952cb4974dd3323908208ca30 - event_sample_app/event_loop_threads.c
a71ed037f9d77d0944f40f54cf25db8180d007e2 - event_sample_app/block_queue.c
d6bbd17599543f1760d87851150a12a2a842a24d - event_sample_app/block_common.c
40f949c4c37ab4aa4a84182b345f3de6fceab39b - event_sample_app/main.c
458833ab233a725c067bf9b1fc60ef39872eee80 - rawstream/Makefile
1fbb82e2281bb2e168c87fd20903bbed898ca160 - rawstream/rawstream_cuda.c
e26c09f1ad1a3a7d2c29dae1b38d3fd90c23af6e - rawstream/rawstream_consumer.c
2bed038ca070aa5dccd6b672a98f093340e829bb - rawstream/rawstream_producer.c
3df4e5c00a3dc002ee9877e282bd28ffa87fa6f0 - rawstream/rawstream.h
d5ffeef3c7ad2af6f6f31385db7917b5ef9a7438 - rawstream/rawstream_ipc_linux.c
f28c1cd5fe26b6dc5930d5556b54364c9b91767c - rawstream/rawstream_main.c

Change-Id: Icdf4312706c30fbcfa1533fba5277879e8d77aec
This commit is contained in:
svcmobrel-release
2025-01-21 05:25:22 -08:00
parent e927f757fd
commit 5482324389
31 changed files with 7781 additions and 0 deletions

29
commitFile.txt Normal file
View File

@@ -0,0 +1,29 @@
Updating prebuilts and/or headers
8193be73ce0a488f62034cb87083cdf09f52cd5d - event_sample_app/block_pool.c
a0bd135d707994a41ed3a4234b5f875a268fed4d - event_sample_app/Makefile
44f6de348f8bdd5cb584b3e8cc4b05e9482dddd2 - event_sample_app/event_loop.h
6ff0f1c2d7ef2e2fa9ece6fdc850b58b87207526 - event_sample_app/block_returnsync.c
1158201e78094e9e866fa99095c9ffc2ec9f5a27 - event_sample_app/block_limiter.c
f5e2aea98ba9264ee1068a700222dff8d5d5c7a4 - event_sample_app/block_c2c.c
ef057870dade9af70656b37340e9bcad35d49380 - event_sample_app/block_multicast.c
641e3634da873970b574b23a1024b2e7155b88ff - event_sample_app/block_consumer_uc1.c
ced622a41d1a48dcb23e6a1a02ae9640ef9b837c - event_sample_app/util.h
3a1013021a572887303fb6db245b5b01fe07e9a0 - event_sample_app/block_producer_uc1.c
dac99c442185b020fbdae07bfc1e7df78343eb83 - event_sample_app/block_info.h
b5dd68bec3ae6f9049aad1cb5a86c3db4af02e17 - event_sample_app/block_presentsync.c
e0861e9fe5d160d47d758464146d7192f9c70a5f - event_sample_app/util.c
d7e42e2b6088ff4596abc7256eb018d757a4021e - event_sample_app/usecase1.h
65ffe5af6ae6bc0418f348167c473849d4697e47 - event_sample_app/block_ipc.c
b52e34443ac441a9df48029de944aa0a50d1b101 - event_sample_app/event_loop_service.c
5001f036389a4f7952cb4974dd3323908208ca30 - event_sample_app/event_loop_threads.c
a71ed037f9d77d0944f40f54cf25db8180d007e2 - event_sample_app/block_queue.c
d6bbd17599543f1760d87851150a12a2a842a24d - event_sample_app/block_common.c
40f949c4c37ab4aa4a84182b345f3de6fceab39b - event_sample_app/main.c
458833ab233a725c067bf9b1fc60ef39872eee80 - rawstream/Makefile
1fbb82e2281bb2e168c87fd20903bbed898ca160 - rawstream/rawstream_cuda.c
e26c09f1ad1a3a7d2c29dae1b38d3fd90c23af6e - rawstream/rawstream_consumer.c
2bed038ca070aa5dccd6b672a98f093340e829bb - rawstream/rawstream_producer.c
3df4e5c00a3dc002ee9877e282bd28ffa87fa6f0 - rawstream/rawstream.h
d5ffeef3c7ad2af6f6f31385db7917b5ef9a7438 - rawstream/rawstream_ipc_linux.c
f28c1cd5fe26b6dc5930d5556b54364c9b91767c - rawstream/rawstream_main.c

View File

@@ -0,0 +1,131 @@
NvSciStream Event Loop Driven Sample App - README
Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
property and proprietary rights in and to this material, related
documentation and any modifications thereto. Any use, reproduction,
disclosure or distribution of this material and related documentation
without an express license agreement from NVIDIA CORPORATION or
its affiliates is strictly prohibited.
---
# nvscistream_event_sample - NvSciStream Sample App
## Description
This directory contains an NvSciStream sample application that
supports a variety of use cases, using an event-loop driven model.
Once the stream is fully connected, all further setup and streaming
operations are triggered by events, processed either by a single
NvSciEvent-driven thread or separate threads which wait for events
on each block. The former is the preferred approach for implementing
NvSciStream applications. In addition to those events which NvSci
itself generates, any other event which can be bound to an NvSciEvent
can be added to the event loop. This allows for robust applications
which can handle events regardless of the order in which they occur.
To use this sample for writing your own applications:
* See main.c for examples of how to do top level application setup and
how to select the blocks needed for your use case and connect them
all together.
* See the descriptions in the usecase*.h files to determine which use cases
involve the producer and consumer engines that you are interested in.
* See the appropriate block_*.c files for examples of creating the
necessary blocks and handling the events that they encounter.
See the block_producer_*.c and block_consumer_*.c files for examples of how
to map the relevant engines to and from NvSci.
* See the appropriate event_loop_*.c file for your chosen event handling
method.
## Build the application
The NvSciStream sample includes source code and a Makefile.
Navigate to the sample application directory to build the application:
make clean
make
## Examples of how to run the sample application:
* NOTE:
* Inter-process and inter-chip test cases must be run with sudo.
* NvMedia/CUDA stream (use case 2) of the sample application is not supported
on x86 and Jetson Linux devices.
* Inter-chip use cases are not supported on Jetson Linux devices.
Single-process, single-consumer CUDA/CUDA stream that uses the default event
service:
./nvscistream_event_sample
Single-process, single-consumer stream that uses the threaded event handling:
./nvscistream_event_sample -e t
Single-process NvMedia/CUDA stream with yuv format:
./nvscistream_event_sample -u 2 -s y
Single-process NvMedia/CUDA stream with three consumers, and the second uses
the mailbox mode:
./nvscistream_event_sample -u 2 -m 3 -q 1 m
Multi-process CUDA/CUDA stream with three consumers, one in the same
process as the producer, and the other two in separate processes. The
first and the third consumers use the mailbox mode:
./nvscistream_event_sample -m 3 -p -c 0 -q 0 m &
./nvscistream_event_sample -c 1 -c 2 -q 2 m
Multi-process CUDA/CUDA stream with three consumers, one in the same
process as the producer, and the other two in separate processes.
To simulate the case with a less trusted consumer, one of the consumer
processes is set with lower priority. A limiter block is used to restrict
this consumer to hold at most one packet. The total number of packets is
increased to five.
Linux example:
./nvscistream_event_sample -m 3 -f 5 -p -c 0 -l 2 1 &
./nvscistream_event_sample -c 1 &
nice -n 19 ./nvscistream_event_sample -c 2 &
# Makes the third process as nice as possible.
QNX example:
./nvscistream_event_sample -m 3 -f 5 -p -c 0 -l 2 1 &
./nvscistream_event_sample -c 1 &
nice -n 1 ./nvscistream_event_sample -c 2 &
# Reduces the priority level of the third process by 1.
Multi-process CUDA/CUDA stream with two consumers, one in the same
process as the producer, and the other in a separate processe. Both
processes enable the endpoint information option:
./nvscistream_event_sample -m 2 -p -c 0 -i &
./nvscistream_event_sample -c 1 -i
Multi-process CUDA/CUDA stream with one consumer on another SoC.
The consumer has the FIFO queue attached to the C2C IpcSrc block, and
a three-packet pool attached to the C2C IpcDst block. It uses IPC channel
nvscic2c_pcie_s0_c5_1 <-> nvscic2c_pcie_s0_c6_1 for C2C communication.
./nvscistream_event_sample -P 0 nvscic2c_pcie_s0_c5_1 -Q 0 f
# Run below command on another OS running on peer SOC.
./nvscistream_event_sample -C 0 nvscic2c_pcie_s0_c6_1 -F 0 3
Multi-process CUDA/CUDA stream with four consumers, one in the same
process as the producer, one in another process but in the same OS as the
producer, and two in another process on another OS running in a peer SoC.
The third and fourth consumers have a mailbox queue attached to the C2C
IpcSrc block, and a five-packet pool attached to the C2C IpcDst block.
The third consumer uses nvscic2c_pcie_s0_c5_1 <-> nvscic2c_pcie_s0_c6_1 for
C2C communication. The 4th consumer uses nvscic2c_pcie_s0_c5_2 <->
nvscic2c_pcie_s0_c6_2 for C2C communication.
./nvscistream_event_sample -m 4 -c 0 -q 0 m -Q 2 m -Q 3 m -P 2 nvscic2c_pcie_s0_c5_1 -P 3 nvscic2c_pcie_s0_c5_2 &
./nvscistream_event_sample -c 1 -q 1 m
# Run below command on another OS running on peer SOC.
./nvscistream_event_sample -C 2 nvscic2c_pcie_s0_c6_1 -q 2 f -F 2 5 -C 3 nvscic2c_pcie_s0_c6_2 -q 3 m -F 3 5

160
event_sample_app/Makefile Normal file
View File

@@ -0,0 +1,160 @@
# Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
#
# Location of common libraries
LIB_DIR = /usr/lib/aarch64-linux-gnu
# NOTE: This directory PATH will be moved from "tegra" to "nvidia".
TEGRA_LIB_DIR ?= /usr/lib/aarch64-linux-gnu/tegra
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
# Location of NVSCI header
NVSCI_HEADER_DIR ?= /usr/include/nvsci_headers
NVSCI_LIB_DIR = $(TEGRA_LIB_DIR)
GCC ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(GCC)
# internal flags
NVCCFLAGS :=
CCFLAGS :=
LDFLAGS :=
# Extra user flags
EXTRA_NVCCFLAGS ?=
EXTRA_LDFLAGS ?=
EXTRA_CCFLAGS ?=
override abi := aarch64
LDFLAGS += --dynamic-linker=/lib/ld-linux-aarch64.so.1
# Debug build flags
dbg = 0
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
TARGET := debug
else
TARGET := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
################################################################################
# Common includes and paths
INCLUDES := -I./
LIBRARIES := -L$(LIB_DIR)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
CUDA_SEARCH_PATH += $(CUDA_PATH)/lib/stubs
CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
ifeq ("$(CUDALIB)","")
$(error ERROR - libcuda.so not found, CUDA Driver is not installed or CUDA_PATH is not correctly set.)
else
CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
LIBRARIES += -L$(CUDALIB) -lcuda -lrt
endif
# Includes and paths for NVSCI libraries
NVSCIBUFHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscibuf.h -print 2>/dev/null)
NVSCISYNCHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscisync.h -print 2>/dev/null)
NVSCISTREAMHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscistream.h -print 2>/dev/null)
NVSCIEVENTHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscievent.h -print 2>/dev/null)
NVSCIIPCHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvsciipc.h -print 2>/dev/null)
ifeq ("$(NVSCIBUFHEADER)","")
$(error ERROR - nvscibuf.h not found in $(NVSCI_HEADER_DIR))
endif
ifeq ("$(NVSCISYNCHEADER)","")
$(error ERROR - nvscisync.h not found in $(NVSCI_HEADER_DIR))
endif
ifeq ("$(NVSCISTREAMHEADER)","")
$(error ERROR - nvscistream.h not found in $(NVSCI_HEADER_DIR))
endif
ifeq ("$(NVSCIEVENTHEADER)","")
$(error ERROR - nvscievent.h not found in $(NVSCI_HEADER_DIR))
endif
ifeq ("$(NVSCIIPCHEADER)","")
$(error ERROR - nvsciipc.h not found in $(NVSCI_HEADER_DIR))
endif
INCLUDES += -I$(NVSCI_HEADER_DIR)
LIBRARIES += -L$(NVSCI_LIB_DIR) -lnvscibuf -lnvscisync -lnvscievent -lnvsciipc -lnvscistream
ALL_CCFLAGS += --std=c++11 --threads 0
# CUDA code generation flags
# Gencode arguments
SMS ?= 53 61 70 72 75 80 86 87
ifeq ($(GENCODE_FLAGS),)
# Generate SASS code for each SM architecture listed in $(SMS)
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
HIGHEST_SM := $(lastword $(sort $(SMS)))
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
endif
################################################################################
# Target rules
OUTPUT := nvscistream_event_sample
all: build
build: $(OUTPUT)
OBJ := main.o
OBJ += block_common.o
OBJ += block_c2c.o
OBJ += block_consumer_uc1.o
OBJ += block_ipc.o
OBJ += block_limiter.o
OBJ += block_multicast.o
OBJ += block_pool.o
OBJ += block_presentsync.o
OBJ += block_producer_uc1.o
OBJ += block_queue.o
OBJ += block_returnsync.o
OBJ += event_loop_service.o
OBJ += event_loop_threads.o
OBJ += util.o
%.o: %.c
$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
$(OUTPUT): $(OBJ)
$(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
run: build
$(OUTPUT)
testrun: build
clean:
rm -f $(OBJ) $(OUTPUT)
clobber: clean

View File

@@ -0,0 +1,116 @@
/* NvSciStream Event Loop Driven Sample App - pool block
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvsciipc.h"
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/* Create and register a new C2C src block */
int32_t createC2cSrc(
NvSciStreamBlock* c2cSrc,
const char* channel,
NvSciStreamBlock queue)
{
NvSciError err;
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("C2cSrc", 0);
if (NULL == blockData) {
return 0;
}
/* Open the named channel */
err = NvSciIpcOpenEndpoint(channel, &ipcEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to open channel (%s) for C2C src\n",
err, channel);
deleteCommon(blockData);
return 0;
}
err = NvSciIpcResetEndpointSafe(ipcEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to reset IPC endpoint", err);
}
/* Create a C2C src block */
err = NvSciStreamIpcSrcCreate2(ipcEndpoint,
sciSyncModule,
sciBufModule,
queue,
&blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create C2C src block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*c2cSrc = blockData->block;
return 1;
}
/* Create and register a new C2C dst block */
int32_t createC2cDst(
NvSciStreamBlock* c2cDst,
const char* channel,
NvSciStreamBlock pool)
{
NvSciError err;
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("C2cDst", 0);
if (NULL == blockData) {
return 0;
}
/* Open the named channel */
err = NvSciIpcOpenEndpoint(channel, &ipcEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to open channel (%s) for C2C dst\n",
err, channel);
deleteCommon(blockData);
return 0;
}
err = NvSciIpcResetEndpointSafe(ipcEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to reset IPC endpoint", err);
}
/* Create a C2C dst block */
err = NvSciStreamIpcDstCreate2(ipcEndpoint,
sciSyncModule,
sciBufModule,
pool,
&blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create C2C dst block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*c2cDst = blockData->block;
return 1;
}

View File

@@ -0,0 +1,177 @@
/* NvSciStream Event Loop Driven Sample App - common block event handling
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* Block types which do not require type-specific interactions make use of
* this common code.
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/* Delete common block */
void deleteCommon(
void* data)
{
BlockData* blockData = (BlockData*)data;
/* Destroy block */
if (blockData->block != 0) {
(void)NvSciStreamBlockDelete(blockData->block);
}
/* Free data */
free(blockData);
}
/* Handle events on a common block
*
* Blocks that don't require interaction after connection usually just
* receive notification of connection and disconnection.
*/
int32_t handleCommon(
void* data,
uint32_t wait)
{
/* Cast to common data */
BlockData* blockData = (BlockData*)data;
/* Get time to wait */
int64_t waitTime = wait ? blockData->waitTime : 0;
/* Query/wait for an event on the block */
NvSciStreamEventType event;
NvSciError err;
err = NvSciStreamBlockEventQuery(blockData->block, waitTime, &event);
/* Handle errors */
if (NvSciError_Success != err) {
/* If not asked to wait, a timeout is not an error */
if (!waitTime && (NvSciError_Timeout == err)) {
return 0;
}
/* Otherwise, any error is considered fatal. A timeout probably
* indicates a failure to connect and complete setup in a timely
* fashion, so we specifically call out this case.
*/
if (NvSciError_Timeout == err) {
printf("%s timed out waiting for setup instructions\n",
blockData->name);
} else {
printf("%s event query failed with error %x\n",
blockData->name, err);
}
blockData->deleteFunc(blockData);
return -1;
}
/* If we received an event, handle it based on its type */
int32_t rv = 1;
NvSciError status;
switch (event) {
/*
* Any event we don't explicitly handle is a fatal error
*/
default:
printf("%s received unknown event %x\n",
blockData->name, event);
rv = -1;
break;
/*
* Error events should never occur with safety-certified drivers,
* and are provided only in non-safety builds for debugging
* purposes. Even then, they should only occur when something
* fundamental goes wrong, like the system running out of memory,
* or stack/heap corruption, or a bug in NvSci which should be
* reported to NVIDIA.
*/
case NvSciStreamEventType_Error:
err = NvSciStreamBlockErrorGet(blockData->block, &status);
if (NvSciError_Success != err) {
printf("%s Failed to query the error event code %x\n",
blockData->name, err);
} else {
printf("%s received error event: %x\n",
blockData->name, status);
}
rv = -1;
break;
/*
* If told to disconnect, it means either the stream finished its
* business or some other block had a failure. We'll just do a
* clean up and return without an error.
*/
case NvSciStreamEventType_Disconnected:
rv = 2;
break;
/*
* The block doesn't have to do anything on connection, but now we may
* wait forever for any further events, so the timeout becomes infinite.
*/
case NvSciStreamEventType_Connected:
/* Query producer and consumer(s) endpoint info if needed */
blockData->waitTime = -1;
break;
/* All setup complete. Transition to runtime phase */
case NvSciStreamEventType_SetupComplete:
break;
}
/* On failure or final event, clean up the block */
if ((rv < 0) || (1 < rv)) {
blockData->deleteFunc(blockData);
}
return rv;
}
/* Create and register a new common block */
BlockData* createCommon(
char const* name,
size_t size)
{
/* If no size specified, just use BlockData */
if (0 == size) {
size = sizeof(BlockData);
}
/* Create a data structure to track the block's status */
BlockData* commonData = (BlockData*)calloc(1, size);
if (NULL == commonData) {
printf("Failed to allocate data structure for %s\n", name);
return NULL;
}
/* Save the name for debugging purposes */
strcpy(commonData->name, name);
/* Wait time for initial connection event will be 60 seconds */
commonData->waitTime = 60 * 1000000;
/* Use the common delete function */
commonData->deleteFunc = deleteCommon;
return commonData;
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,153 @@
/* NvSciStream Event Loop Driven Sample App - block abstraction
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#ifndef _BLOCK_INFO_H
#define _BLOCK_INFO_H 1
#include "nvscistream.h"
/* Maximum number of consumers */
#define MAX_CONSUMERS 4
/* Maximum number of packets supported */
#define MAX_PACKETS 32
/* Maximum number of elements supported */
#define MAX_ELEMS 4
/* Memory size of endpoint inforamtion */
#define INFO_SIZE 50
/* NvSci modules for all objects */
extern NvSciSyncModule sciSyncModule;
extern NvSciBufModule sciBufModule;
/* NvSciIpc Endpoint */
extern NvSciIpcEndpoint ipcEndpoint;
/* Common options for all blocks */
typedef struct {
bool endInfo;
bool yuv;
} CommonOptions;
extern CommonOptions opts;
/* Structure to track packet element attributes */
typedef struct {
/* The application's name for the element */
uint32_t userName;
/* Attribute list for element */
NvSciBufAttrList attrList;
} ElemAttr;
/*
* Some block types that do not require direct interaction will share a
* common private data structure and event handling functon.
*/
/* Common block private data */
typedef struct {
NvSciStreamBlock block;
int64_t waitTime;
char name[32];
void (*deleteFunc)(void*);
} BlockData;
/* Create data structure for common blocks */
extern BlockData* createCommon(
char const* name,
size_t size);
/* Handle event for common block */
extern int32_t handleCommon(
void* data,
uint32_t wait);
/* Delete common block */
extern void deleteCommon(
void* data);
/*
* Functions for setting up each kind of block
*/
extern int32_t createIpcDst(
NvSciStreamBlock* ipcDst,
const char* channel);
extern int32_t createIpcSrc(
NvSciStreamBlock* ipcSrc,
const char* channel);
extern int32_t createC2cDst(
NvSciStreamBlock* c2cDst,
const char* channel,
NvSciStreamBlock pool);
extern int32_t createC2cSrc(
NvSciStreamBlock* c2cSrc,
const char* channel,
NvSciStreamBlock queue);
extern int32_t createLimiter(
NvSciStreamBlock* limiter,
uint32_t limit);
extern int32_t createPresentSync(
NvSciStreamBlock* presentSync);
extern int32_t createReturnSync(
NvSciStreamBlock* returnSync);
extern int32_t createMulticast(
NvSciStreamBlock* multicast,
uint32_t numConsumer);
extern int32_t createPool(
NvSciStreamBlock* pool,
uint32_t numPacket,
bool isC2cPool);
extern int32_t createQueue(
NvSciStreamBlock* queue,
uint32_t useMailbox);
extern int32_t (*createProducer)(
NvSciStreamBlock* producer,
NvSciStreamBlock pool);
extern int32_t (*createConsumer)(
NvSciStreamBlock* consumer,
NvSciStreamBlock queue,
uint32_t index);
extern int32_t (createProducer_Usecase1)(
NvSciStreamBlock* producer,
NvSciStreamBlock pool);
extern int32_t (createConsumer_Usecase1)(
NvSciStreamBlock* consumer,
NvSciStreamBlock queue,
uint32_t index);
extern int32_t (createProducer_Usecase2)(
NvSciStreamBlock* producer,
NvSciStreamBlock pool);
extern int32_t (createConsumer_Usecase2)(
NvSciStreamBlock* consumer,
NvSciStreamBlock queue,
uint32_t index);
#endif // _BLOCK_INFO_H

View File

@@ -0,0 +1,120 @@
/* NvSciStream Event Loop Driven Sample App - ipc blocks
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvsciipc.h"
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/*
* Ipc blocks do not require any block-specific interactions so we
* use the set of common functions to handle its events. However
* they do have an additional data field which needs to be cleaned
* up when the block is destroyed, so we use more than the common
* data structure and delete function.
*/
/* Create and register a new ipcsrc block */
int32_t createIpcSrc(
NvSciStreamBlock* ipcsrc,
const char* channel)
{
NvSciError err;
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("IpcSrc", 0);
if (NULL == blockData) {
return 0;
}
/* Open the named channel */
err = NvSciIpcOpenEndpoint(channel, &ipcEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to open channel (%s) for IpcSrc\n",
err, channel);
deleteCommon(blockData);
return 0;
}
err = NvSciIpcResetEndpointSafe(ipcEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to reset IPC endpoint", err);
}
/* Create a ipcsrc block */
err = NvSciStreamIpcSrcCreate(ipcEndpoint,
sciSyncModule,
sciBufModule,
&blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create IpcSrc block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*ipcsrc = blockData->block;
return 1;
}
/* Create and register a new ipcdst block */
int32_t createIpcDst(
NvSciStreamBlock* ipcdst,
const char* channel)
{
NvSciError err;
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("IpcDst", 0);
if (NULL == blockData) {
return 0;
}
/* Open the named channel */
err = NvSciIpcOpenEndpoint(channel, &ipcEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to open channel (%s) for IpcDst\n",
err, channel);
deleteCommon(blockData);
return 0;
}
err = NvSciIpcResetEndpointSafe(ipcEndpoint);
if (NvSciError_Success != err) {
printf("Failed (%x) to reset IPC endpoint", err);
}
/* Create a ipcdst block */
err = NvSciStreamIpcDstCreate(ipcEndpoint,
sciSyncModule,
sciBufModule,
&blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create IpcDst block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*ipcdst = blockData->block;
return 1;
}

View File

@@ -0,0 +1,52 @@
/* NvSciStream Event Loop Driven Sample App - limiter block
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/*
* Limiter blocks do not require any block-specific interactions so we
* use the set of common functions to handle its events.
*/
/* Create and register a new limiter block */
int32_t createLimiter(
NvSciStreamBlock* limiter,
uint32_t limit)
{
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("Limiter", 0);
if (NULL == blockData) {
return 0;
}
/* Create a limiter block */
NvSciError err =
NvSciStreamLimiterCreate(limit, &blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create limiter block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*limiter = blockData->block;
return 1;
}

View File

@@ -0,0 +1,53 @@
/* NvSciStream Event Loop Driven Sample App - multicast block
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/*
* Multicast blocks do not require any block-specific interactions so we
* use the set of common functions to handle its events.
*/
/* Create and register a new multicast block */
int32_t createMulticast(
NvSciStreamBlock* multicast,
uint32_t numConsumer)
{
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("Multicast", 0);
if (NULL == blockData) {
return 0;
}
/* Create a multicast block */
NvSciError err =
NvSciStreamMulticastCreate(numConsumer, &blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create limiter block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*multicast = blockData->block;
return 1;
}

View File

@@ -0,0 +1,731 @@
/* NvSciStream Event Loop Driven Sample App - pool block
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/* Internal data used by the pool block */
typedef struct {
BlockData common;
bool isC2cPool;
uint32_t numConsumers;
uint32_t numProdElem;
uint32_t numConsElem;
bool elementsDone;
ElemAttr prodElem[MAX_ELEMS];
ElemAttr consElem[MAX_ELEMS];
uint32_t numPacket;
uint32_t numPacketReady;
bool packetsDone;
NvSciStreamPacket packet[MAX_PACKETS];
} PoolData;
/* Free up pool block resources */
static void deletePool(
PoolData* poolData)
{
/* Destroy block */
if (poolData->common.block != 0) {
(void)NvSciStreamBlockDelete(poolData->common.block);
}
/* Free data */
free(poolData);
}
/* Handle query of basic stream info */
static int32_t handleStreamInit(
PoolData* poolData)
{
/* Query number of consumers */
NvSciError err =
NvSciStreamBlockConsumerCountGet(poolData->common.block,
&poolData->numConsumers);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to query number of consumers", err);
return 0;
}
/* Query producer and consumer(s) endpoint info if needed */
return 1;
}
/*
* If ready, handle setup of the pool buffers.
*
* Most of the work the pool application has to do resides in this function.
*/
static int32_t handlePoolBufferSetup(
PoolData* poolData)
{
NvSciError err;
/* Query producer element count */
err = NvSciStreamBlockElementCountGet(poolData->common.block,
NvSciStreamBlockType_Producer,
&poolData->numProdElem);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to query producer element count\n", err);
return 0;
}
/* Query consumer element count */
err = NvSciStreamBlockElementCountGet(poolData->common.block,
NvSciStreamBlockType_Consumer,
&poolData->numConsElem);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to query consumer element count\n", err);
return 0;
}
/* Query all producer elements */
for (uint32_t i=0U; i<poolData->numProdElem; ++i) {
err = NvSciStreamBlockElementAttrGet(poolData->common.block,
NvSciStreamBlockType_Producer, i,
&poolData->prodElem[i].userName,
&poolData->prodElem[i].attrList);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to query producer element %d\n", err, i);
return 0;
}
}
/* Query all consumer elements */
for (uint32_t i=0U; i<poolData->numConsElem; ++i) {
err = NvSciStreamBlockElementAttrGet(poolData->common.block,
NvSciStreamBlockType_Consumer, i,
&poolData->consElem[i].userName,
&poolData->consElem[i].attrList);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to query consumer element %d\n", err, i);
return 0;
}
}
/* Indicate that all element information has been imported */
poolData->elementsDone = true;
err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
NvSciStreamSetup_ElementImport,
true);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to complete element import\n", err);
return 0;
}
/*
* Go through requested elements from producer and consumer and line
* them up. A general streaming application might not have a one to
* one correspondence, and the pool may have to decide what subset
* of elements to select based on knowledge of the data types that
* the application suite supports. This sample application is much
* simpler, but we still go through the process rather than assuming
* producer and consumer have requested the same things in the same
* order.
*/
uint32_t numElem = 0, p, c, e, i;
ElemAttr elem[MAX_ELEMS];
for (p=0; p<poolData->numProdElem; ++p) {
ElemAttr* prodElem = &poolData->prodElem[p];
for (c=0; c<poolData->numConsElem; ++c) {
ElemAttr* consElem = &poolData->consElem[c];
/* If requested element types match, combine the entries */
if (prodElem->userName == consElem->userName) {
ElemAttr* poolElem = &elem[numElem++];
poolElem->userName = prodElem->userName;
poolElem->attrList = NULL;
/* Combine and reconcile the attribute lists */
NvSciBufAttrList oldAttrList[2] = { prodElem->attrList,
consElem->attrList };
NvSciBufAttrList conflicts = NULL;
err = NvSciBufAttrListReconcile(oldAttrList, 2,
&poolElem->attrList,
&conflicts);
/* Discard any conflict list.
* (Could report its contents for additional debug info)
*/
if (NULL != conflicts) {
NvSciBufAttrListFree(conflicts);
}
/* Abort on error */
if (NvSciError_Success != err) {
printf("Failed to reconcile element %x attrs (%x)\n",
poolElem->userName, err);
return 0;
}
/* Found a match for this producer element so move on */
break;
} /* if match */
} /* for all requested consumer elements */
} /* for all requested producer elements */
/* Should be at least one element */
if (0 == numElem) {
printf("Pool didn't find any common elements\n");
return 0;
}
/* The requested attribute lists are no longer needed, so discard them */
for (p=0; p<poolData->numProdElem; ++p) {
ElemAttr* prodElem = &poolData->prodElem[p];
if (NULL != prodElem->attrList) {
NvSciBufAttrListFree(prodElem->attrList);
prodElem->attrList = NULL;
}
}
for (c=0; c<poolData->numConsElem; ++c) {
ElemAttr* consElem = &poolData->consElem[c];
if (NULL != consElem->attrList) {
NvSciBufAttrListFree(consElem->attrList);
consElem->attrList = NULL;
}
}
/* Inform the stream of the chosen elements */
for (e=0; e<numElem; ++e) {
ElemAttr* poolElem = &elem[e];
err = NvSciStreamBlockElementAttrSet(poolData->common.block,
poolElem->userName,
poolElem->attrList);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to send element %d info\n", err, e);
return 0;
}
}
/* Indicate that all element information has been exported */
err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
NvSciStreamSetup_ElementExport,
true);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to complete element export\n", err);
return 0;
}
/*
* Create and send all the packets and their buffers
* Note: Packets and buffers are not guaranteed to be received by
* producer and consumer in the same order sent, nor are the
* status messages sent back guaranteed to preserve ordering.
* This is one reason why an event driven model is more robust.
*/
for (i=0; i<poolData->numPacket; ++i) {
/*
* Create a new packet
* Our pool implementation doesn't need to save any packet-specific
* data, but we do need to provide unique cookies, so we just
* use the pointer to the location we save the handle. For other
* blocks, this will be a pointer to the structure where the
* packet information is kept.
*/
NvSciStreamCookie cookie = (NvSciStreamCookie)&poolData->packet[i];
err = NvSciStreamPoolPacketCreate(poolData->common.block,
cookie,
&poolData->packet[i]);
if (NvSciError_Success != err) {
printf("Failed (%x) to create packet %d\n", err, i);
return 0;
}
/* Create buffers for the packet */
for (e=0; e<numElem; ++e) {
/* Allocate a buffer object */
NvSciBufObj obj;
err = NvSciBufObjAlloc(elem[e].attrList, &obj);
if (NvSciError_Success != err) {
printf("Failed (%x) to allocate buffer %d of packet %d\n",
err, e, i);
return 0;
}
/* Insert the buffer in the packet */
err = NvSciStreamPoolPacketInsertBuffer(poolData->common.block,
poolData->packet[i],
e, obj);
if (NvSciError_Success != err) {
printf("Failed (%x) to insert buffer %d of packet %d\n",
err, e, i);
return 0;
}
/* The pool doesn't need to keep a copy of the object handle */
NvSciBufObjFree(obj);
}
/* Indicate packet setup is complete */
err = NvSciStreamPoolPacketComplete(poolData->common.block,
poolData->packet[i]);
if (NvSciError_Success != err) {
printf("Failed (%x) to complete packet %d setup\n",
err, i);
return 0;
}
}
/*
* Indicate that all packets have been sent.
* Note: An application could choose to wait to send this until
* the status has been received, in order to try to make any
* corrections for rejected packets.
*/
err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
NvSciStreamSetup_PacketExport,
true);
if (NvSciError_Success != err) {
printf("Failed (%x) to complete packet export\n",
err);
return 0;
}
/* Once all packets are set up, no longer need to keep the attributes */
for (e=0; e<numElem; ++e) {
ElemAttr* poolElem = &elem[e];
if (NULL != poolElem->attrList) {
NvSciBufAttrListFree(poolElem->attrList);
poolElem->attrList = NULL;
}
}
return 1;
}
/*
* If ready, handle setup of the C2C pool buffers.
*
* Most of the work the pool application has to do resides in this function.
*/
static int32_t handleC2cPoolBufferSetup(
PoolData* poolData)
{
NvSciError err;
/* Query allocated element count from the primary pool */
uint32_t numElem;
err = NvSciStreamBlockElementCountGet(poolData->common.block,
NvSciStreamBlockType_Producer,
&numElem);
if (NvSciError_Success != err) {
printf("C2C pool failed (%x) to query allocated element count\n",
err);
return 0;
}
/* Query all allocated elements from the primary pool */
ElemAttr elem[MAX_ELEMS];
for (uint32_t i = 0U; i<numElem; ++i) {
err = NvSciStreamBlockElementAttrGet(poolData->common.block,
NvSciStreamBlockType_Producer, i,
&elem[i].userName,
&elem[i].attrList);
if (NvSciError_Success != err) {
printf("C2C pool failed (%x) to query allocated element %d\n",
err, i);
return 0;
}
}
/* If necessary, query the consumer elements for validation */
/* Indicate that all element information has been imported */
poolData->elementsDone = true;
err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
NvSciStreamSetup_ElementImport,
true);
if (NvSciError_Success != err) {
printf("C2C pool failed (%x) to complete element import\n", err);
return 0;
}
/*
* Create and send all the packets and their buffers
*/
for (uint32_t i = 0; i<poolData->numPacket; ++i) {
/*
* Create a new packet
* Our pool implementation doesn't need to save any packet-specific
* data, but we do need to provide unique cookies, so we just
* use the pointer to the location we save the handle. For other
* blocks, this will be a pointer to the structure where the
* packet information is kept.
*/
NvSciStreamCookie cookie = (NvSciStreamCookie)&poolData->packet[i];
err = NvSciStreamPoolPacketCreate(poolData->common.block,
cookie,
&poolData->packet[i]);
if (NvSciError_Success != err) {
printf("Failed (%x) to create packet %d\n", err, i);
return 0;
}
/* Create buffers for the packet */
for (uint32_t e = 0; e<numElem; ++e) {
/* Allocate a buffer object */
NvSciBufObj obj;
err = NvSciBufObjAlloc(elem[e].attrList, &obj);
if (NvSciError_Success != err) {
printf("Failed (%x) to allocate buffer %d of packet %d\n",
err, e, i);
return 0;
}
/* Insert the buffer in the packet */
err = NvSciStreamPoolPacketInsertBuffer(poolData->common.block,
poolData->packet[i],
e, obj);
if (NvSciError_Success != err) {
printf("Failed (%x) to insert buffer %d of packet %d\n",
err, e, i);
return 0;
}
/* The pool doesn't need to keep a copy of the object handle */
NvSciBufObjFree(obj);
}
/* Indicate packet setup is complete */
err = NvSciStreamPoolPacketComplete(poolData->common.block,
poolData->packet[i]);
if (NvSciError_Success != err) {
printf("Failed (%x) to complete packet %d setup\n",
err, i);
return 0;
}
}
/*
* Indicate that all packets have been sent.
* Note: An application could choose to wait to send this until
* the status has been received, in order to try to make any
* corrections for rejected packets.
*/
err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
NvSciStreamSetup_PacketExport,
true);
if (NvSciError_Success != err) {
printf("Failed (%x) to complete packet export\n",
err);
return 0;
}
/* Once all packets are set up, no longer need to keep the attributes */
for (uint32_t e = 0; e<numElem; ++e) {
ElemAttr* poolElem = &elem[e];
if (NULL != poolElem->attrList) {
NvSciBufAttrListFree(poolElem->attrList);
poolElem->attrList = NULL;
}
}
return 1;
}
/* Check packet status */
static int32_t handlePacketsStatus(
PoolData* poolData)
{
bool packetFailure = false;
NvSciError err;
/* Check each packet */
for (uint32_t p = 0; p < poolData->numPacket; ++p) {
/* Check packet acceptance */
bool accept;
err = NvSciStreamPoolPacketStatusAcceptGet(poolData->common.block,
poolData->packet[p],
&accept);
if (NvSciError_Success != err) {
printf("Failed (%x) to retrieve packet %d's acceptance-statue\n",
err, p);
return 0;
}
if (accept) {
continue;
}
/* On rejection, query and report details */
packetFailure = true;
NvSciError status;
/* Check packet status from producer */
err = NvSciStreamPoolPacketStatusValueGet(
poolData->common.block,
poolData->packet[p],
NvSciStreamBlockType_Producer, 0U,
&status);
if (NvSciError_Success != err) {
printf("Failed (%x) to retrieve packet %d's statue from producer\n",
err, p);
return 0;
}
if (status != NvSciError_Success) {
printf("Producer rejected packet %d with error %x\n", p, status);
}
/* Check packet status from consumers */
for (uint32_t c = 0; c < poolData->numConsumers; ++c) {
err = NvSciStreamPoolPacketStatusValueGet(
poolData->common.block,
poolData->packet[p],
NvSciStreamBlockType_Consumer, c,
&status);
if (NvSciError_Success != err) {
printf("Failed (%x) to retrieve packet %d's statue from consumer %d\n",
err, p, c);
return 0;
}
if (status != NvSciError_Success) {
printf("Consumer %d rejected packet %d with error %x\n",
c, p, status);
}
}
}
/* Indicate that status for all packets has been received. */
poolData->packetsDone = true;
err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
NvSciStreamSetup_PacketImport,
true);
if (NvSciError_Success != err) {
printf("Pool failed (%x) to complete packets export\n", err);
return 0;
}
return packetFailure ? 0 : 1;
}
/* Handle events on a pool block
*
* The pool block coordinates allocation of packets based on producer
* and consumer requirements during setup. After that, no further
* events should be received until the stream is torn down.
*/
static int32_t handlePool(
void* data,
uint32_t wait)
{
/* Cast to pool data */
PoolData* poolData = (PoolData*)data;
/* Get time to wait */
int64_t waitTime = wait ? poolData->common.waitTime : 0;
/* Query/wait for an event on the block */
NvSciStreamEventType event;
NvSciError err;
err = NvSciStreamBlockEventQuery(poolData->common.block, waitTime, &event);
/* Handle errors */
if (NvSciError_Success != err) {
/* If not asked to wait, a timeout is not an error */
if (!waitTime && (NvSciError_Timeout == err)) {
return 0;
}
/* Otherwise, any error is considered fatal. A timeout probably
* indicates a failure to connect and complete setup in a timely
* fashion, so we specifically call out this case.
*/
if (NvSciError_Timeout == err) {
printf("Pool timed out waiting for setup instructions\n");
} else {
printf("Pool event query failed with error %x\n", err);
}
deletePool(poolData);
return -1;
}
/* If we received an event, handle it based on its type
*
* Note that there's a lot of error checking we could choose to do for
* some of these events, like making sure that we only receive each
* event once for a given entry. But NvSciStream is expected to take
* care of all of that, even when the application makes a mistake.
* So we only check for things that don't trigger NvSciStream errors.
*/
int32_t rv = 1;
NvSciError status;
switch (event) {
/*
* Any event we don't explicitly handle is a fatal error
*/
default:
printf("Pool received unknown event %x\n", event);
rv = -1;
break;
/*
* Error events should never occur with safety-certified drivers,
* and are provided only in non-safety builds for debugging
* purposes. Even then, they should only occur when something
* fundamental goes wrong, like the system running out of memory,
* or stack/heap corruption, or a bug in NvSci which should be
* reported to NVIDIA.
*/
case NvSciStreamEventType_Error:
err = NvSciStreamBlockErrorGet(poolData->common.block, &status);
if (NvSciError_Success != err) {
printf("%s Failed to query the error event code %x\n",
poolData->common.name, err);
} else {
printf("%s received error event: %x\n",
poolData->common.name, status);
}
rv = -1;
break;
/*
* If told to disconnect, it means either the stream finished its
* business or some other block had a failure. We'll just do a
* clean up and return without an error. But if it happened before
* all the pool setup operations finished, we'll report it for
* debugging purposes.
*/
case NvSciStreamEventType_Disconnected:
if (!poolData->elementsDone) {
printf("Warning: Pool disconnect before element support\n");
} else if (!poolData->packetsDone) {
printf("Warning: Pool disconnect before packet setup\n");
}
rv = 2;
break;
/*
* The pool doesn't have to do anything immediately on connection, but
* now that the stream is complete we can reduce the timeout to wait
* for the producer and consumer events to arrive.
*/
case NvSciStreamEventType_Connected:
/* Initialize basic stream info */
if (!handleStreamInit(poolData)) {
rv = -1;
}
poolData->common.waitTime = 10 * 1000000;
break;
/* Process all element support from producer and consumer(s) */
case NvSciStreamEventType_Elements:
if (poolData->isC2cPool) {
if (!handleC2cPoolBufferSetup(poolData)) {
rv = -1;
}
} else {
if (!handlePoolBufferSetup(poolData)) {
rv = -1;
}
}
break;
/*
* Check packet/buffer status returned from producer/consumer
* A more sophisticated application might have the means to recover
* from any failures. But in general we expect that in a production
* application, any failures are due to something fundamental going
* wrong like lack of memory/resources, which hopefully has been
* designed out. So these status checks are more useful during
* development, where we just report the issue for debugging purposes.
*
* Once all the status events have been received for all packets
* and buffers, the pool should require no further interaction
* until the time comes to shut down the application. We set the
* wait time to infinite.
*/
case NvSciStreamEventType_PacketStatus:
/* There are multiple ways the status handling could be organized.
* In particular, waiting for status could be interleaved with
* sending the packets. This example waits for status from all
* packets before checking each packet's status.
*/
if (++poolData->numPacketReady < poolData->numPacket) {
break;
}
if (!handlePacketsStatus(poolData)) {
rv = -1;
}
poolData->common.waitTime = -1;
break;
/* All setup complete. Transition to runtime phase */
case NvSciStreamEventType_SetupComplete:
break;
}
/* On failure or final event, clean up the block */
if ((rv < 0) || (1 < rv)) {
deletePool(poolData);
}
return rv;
}
/* Create and register a new pool block */
int32_t createPool(
NvSciStreamBlock* pool,
uint32_t numPacket,
bool isC2cPool)
{
/* Create a data structure to track the block's status */
PoolData* poolData = (PoolData*)calloc(1, sizeof(PoolData));
if (NULL == poolData) {
printf("Failed to allocate data structure for pool\n");
return 0;
}
/* Save the name for debugging purposes */
strcpy(poolData->common.name, "Pool");
/* Save the c2c pool flag */
poolData->isC2cPool = isC2cPool;
/* Save the packet count */
poolData->numPacket = numPacket;
/* Wait time for initial connection event will be 60 seconds */
poolData->common.waitTime = 60 * 1000000;
/* Create a pool block */
NvSciError err =
NvSciStreamStaticPoolCreate(poolData->numPacket,
&poolData->common.block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create pool block\n", err);
deletePool(poolData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(poolData->common.block, poolData, handlePool)) {
deletePool(poolData);
return 0;
}
*pool = poolData->common.block;
return 1;
}

View File

@@ -0,0 +1,51 @@
/* NvSciStream Event Loop Driven Sample App - PresentSync block
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/*
* PresentSync blocks do not require any block-specific interactions so we
* use the set of common functions to handle its events.
*/
/* Create and register a new presentSync block */
int32_t createPresentSync(
NvSciStreamBlock* presentSync)
{
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("PresentSync", 0);
if (NULL == blockData) {
return 0;
}
/* Create a PresentSync block */
NvSciError err =
NvSciStreamPresentSyncCreate(sciSyncModule, &blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create PresentSync block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*presentSync = blockData->block;
return 1;
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,53 @@
/* NvSciStream Event Loop Driven Sample App - queue block
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/*
* Queue blocks do not require any block-specific interactions so we
* use the set of common functions to handle its events.
*/
/* Create and register a new limiter block */
int32_t createQueue(
NvSciStreamBlock* queue,
uint32_t useMailbox)
{
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon(useMailbox ? "Mailbox" : "FIFO", 0);
if (NULL == blockData) {
return 0;
}
/* Create a queue block */
NvSciError err = useMailbox
? NvSciStreamMailboxQueueCreate(&blockData->block)
: NvSciStreamFifoQueueCreate(&blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create queue block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*queue = blockData->block;
return 1;
}

View File

@@ -0,0 +1,51 @@
/* NvSciStream Event Loop Driven Sample App - ReturnSync block
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include <stdlib.h>
#include <stdio.h>
#include "nvscistream.h"
#include "block_info.h"
#include "event_loop.h"
/*
* ReturnSync blocks do not require any block-specific interactions so we
* use the set of common functions to handle its events.
*/
/* Create and register a new returnSync block */
int32_t createReturnSync(
NvSciStreamBlock* returnSync)
{
/* Create a data structure to track the block's status */
BlockData* blockData = createCommon("ReturnSync", 0);
if (NULL == blockData) {
return 0;
}
/* Create a ReturnSync block */
NvSciError err =
NvSciStreamReturnSyncCreate(sciSyncModule, &blockData->block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create ReturnSync block\n", err);
deleteCommon(blockData);
return 0;
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
deleteCommon(blockData);
return 0;
}
*returnSync = blockData->block;
return 1;
}

View File

@@ -0,0 +1,46 @@
/* NvSciStream Event Loop Driven Sample App - event handler abstraction
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#ifndef _EVENT_LOOP_H
#define _EVENT_LOOP_H 1
#include <stdint.h>
#include "nvscistream.h"
/*
* Block event handler function.
* Input:
* data: The block's type-specific private data structure
* wait: Flag indicating whether to wait for an event
* Returns:
* +2: Block has completed its last operation and will be destroyed
* +1: An event was found and processed
* 0: No event was found (not an error)
* -1: Block has encountered a fatal error and will be destroyed
*/
typedef int32_t (*BlockFunc)(void* data, uint32_t wait);
/* Table of events to abstract the two approaches for event loops */
typedef struct {
int32_t (*init)(void);
int32_t (*reg)(NvSciStreamBlock, void*, BlockFunc);
int32_t (*loop)(void);
} EventFuncs;
/* Chosen event function table */
extern EventFuncs const* eventFuncs;
/* Event tables for the two methods */
extern EventFuncs const eventFuncs_Service;
extern EventFuncs const eventFuncs_Threads;
#endif // _EVENT_LOOP_H

View File

@@ -0,0 +1,249 @@
/* NvSciStream Event Loop Driven Sample App - service-based event handling
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* This file implements the option to handle events for all blocks
* through an event service. Each block adds an event notifier to
* a list. That notifier will be signaled when an event is ready
* on the block. A single main loop waits for one or more of the
* notifiers to trigger, processes events on the corresponding
* blocks, and goes back to waiting. When all blocks have been
* destroyed either due to failure or all payloads being processed,
* the loop exits and the function returns.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <stdbool.h>
#if (QNX == 1)
#include <sys/neutrino.h>
#endif
#include "nvscievent.h"
#include "event_loop.h"
/* Event service */
static NvSciEventLoopService* service = NULL;
/* Structure to track block info */
typedef struct {
NvSciStreamBlock handle;
void* data;
BlockFunc func;
NvSciEventNotifier* notifier;
} BlockEventData;
/* List of blocks */
#define MAX_BLOCKS 100
static int32_t numBlocks = 0;
static BlockEventData blocks[MAX_BLOCKS];
static uint32_t success = 1U;
/* Initialize service-based event handling */
static int32_t eventServiceInit(void)
{
/*
* The OS configuration should be NULL for Linux and should
* have a valid configuration for QNX.
* See NvSciEventLoopServiceCreateSafe API Specification for more
* information.
*/
void *osConfig = NULL;
#if (QNX == 1)
struct nto_channel_config config = {0};
/*
* The number of pulses could be calculated based on the
* number of notifiers bind to the event service, number of packets and
* number of events handled by each block.
* (num_of_pulses = num_of_notifiers * 4 + \
* (num_packets + 5) * num_of_endpoints)
* If experienced pulse pool shortage issue in normal operation, increase
* the number of pulses.
* If there are no available pulses in the pool, SIGKILL is delivered
* by default. You may configure the sigevent that you want to be
* delivered when a pulse can't be obtained from the pool.
*
* See NvSciEventLoopServiceCreateSafe API Specification for more
* information.
*/
/* The num_pulses set below is just an example number and should be
* adjusted depending on the use case.
*/
config.num_pulses = 100U;
config.rearm_threshold = 0;
osConfig = &config;
#endif
/* Create event loop service */
NvSciError err = NvSciEventLoopServiceCreateSafe(1U, osConfig, &service);
if (NvSciError_Success != err) {
printf("Failed (%x) to create event service\n", err);
return 0;
}
return 1;
}
/* Register a new block with the event management */
static int32_t eventServiceRegister(
NvSciStreamBlock blockHandle,
void* blockData,
BlockFunc blockFunc)
{
/* Sanity check to make sure we left room for enough blocks */
if (numBlocks >= MAX_BLOCKS) {
printf("Exceeded maximum number of blocks\n");
return 0;
}
/* Grab the next entry in the list for the new block and fill it in */
BlockEventData* entry = &blocks[numBlocks++];
entry->handle = blockHandle;
entry->data = blockData;
entry->func = blockFunc;
/* Create a notifier for events on this block */
NvSciError err =
NvSciStreamBlockEventServiceSetup(entry->handle,
&service->EventService,
&entry->notifier);
if (NvSciError_Success != err ) {
printf("Failed (%x) to create event notifier for block\n", err);
return 0;
}
return 1;
}
/* Main service-based event loop */
static int32_t eventServiceLoop(void)
{
int32_t i;
/*
* Notes on handling notificiations:
* If more than one signal occurs on a notifier in between calls
* to check for events, then NvSciEvent will squash the notifications,
* so only one is received. This means the application must drain
* all pending events on a block after its notifier signals. It won't
* receive new notifications for those pending events.
* A simple implementation might process each block's events in a loop
* until there are no more, and then move on to the next block. But
* this poses a risk of starvation. Consider the case of a stream in
* mailbox mode, where the mailbox already has a waiting payload.
* If the producer receives a PacketReady event, it will obtain
* the packet, fill it with data, and present it to the stream.
* Because the mailbox is full, the packet will immediately be
* returned, resulting in a new PacketReady event. The application
* can go into an infinite loop, generating new payloads on the
* producer without giving the consumer a chance to process them.
* We therefore use an event loop that only processes one event
* per block for each iteration, but keeps track of whether there
* was an event on a block for the previous pass, and if so
* retries it even if no new signal occurred. The event loop
* waits for events only when there was no prior event. Otherwise
* it only polls for new ones.
*/
/* Pack all notifiers into an array */
NvSciEventNotifier* notifiers[MAX_BLOCKS];
for (i=0; i<numBlocks; ++i) {
notifiers[i] = blocks[i].notifier;
}
/* Initialize loop control parameters */
uint32_t numAlive = numBlocks;
int64_t timeout = -1;
bool retry[MAX_BLOCKS];
bool event[MAX_BLOCKS];
memset(retry, 0, sizeof(retry));
/* Main loop - Handle events until all blocks report completion or fail */
while (numAlive) {
/* Wait/poll for events, depending on current timeout */
memset(event, 0, sizeof(event));
NvSciError err = service->WaitForMultipleEventsExt(
&service->EventService,
notifiers,
numBlocks,
timeout,
event);
if ((NvSciError_Success != err) && (NvSciError_Timeout != err)) {
printf("Failure (%x) while waiting/polling event service\n", err);
return 0;
}
/* Timeout for next pass will be infinite unless we need to retry */
timeout = -1;
/*
* Check for events on new blocks that signaled or old blocks that
* had an event on the previous pass. This is done in reverse
* of the order in which blocks were registered. This is because
* producers are created before consumers, and for mailbox mode
* we want to give the consumer a chance to use payloads before
* the producer replaces them.
*/
for (i=numBlocks-1; i>=0; --i) {
if (event[i] || retry[i]) {
/* Get block info */
BlockEventData* entry = &blocks[i];
/* Reset to no retry for next pass */
retry[i] = false;
/* Skip if this block is no longer in use */
if (entry->data) {
/* Call the block's event handler function */
int32_t rv = entry->func(entry->data, 0);
if (rv < 0) {
/* On failure, no longer check block and app failed */
success = 0U;
entry->data = NULL;
numAlive--;
} else if (rv == 2) {
/* On completion, no longer check block */
entry->data = NULL;
numAlive--;
} else if (rv == 1) {
/* If event found, retry next loop */
timeout = 0;
retry[i] = true;
}
}
}
}
}
/* Delete notifiers */
for (i=0; i<numBlocks; ++i) {
notifiers[i]->Delete(notifiers[i]);
}
/* Delete service */
service->EventService.Delete(&service->EventService);
return success;
}
/* Table of functions for service-based event handling */
EventFuncs const eventFuncs_Service = {
.init = eventServiceInit,
.reg = eventServiceRegister,
.loop = eventServiceLoop
};

View File

@@ -0,0 +1,126 @@
/* NvSciStream Event Loop Driven Sample App - thread-based event handling
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* This file implements the option to handle events for each block in
* a separate thread. Each thread just has a loop that waits for a
* block event to occur and handles it appropriately, until the block
* has performed all required actions or receives notification that
* the stream has disconnected.
*
* In practice, only a few block types (producer, consumer, and pool)
* receive any events that need to be handled. So a more streamlined
* application might choose to only monitor them, assuming that the
* other blocks can be left alone until the time comes to tear them
* down.
*
* Note: We use standard pthread functions here because it allows this
* sample to run on all operating systems. QNX has its own thread
* management functions which might be more efficient when using
* this approach.
*/
#include <stdint.h>
#include <stdio.h>
#include <pthread.h>
#include "event_loop.h"
/* Structure to track block info */
typedef struct {
NvSciStreamBlock handle;
void* data;
BlockFunc func;
pthread_t thread;
} BlockEventData;
/* List of blocks */
#define MAX_BLOCKS 100U
static uint32_t numBlocks = 0U;
static BlockEventData blocks[MAX_BLOCKS];
static uint32_t success = 1U;
/* The per-thread loop function for each block */
static void* eventThreadFunc(void* arg)
{
/* Simple loop, waiting for events on the block until the block is done */
BlockEventData* entry = (BlockEventData*)arg;
while (1) {
int32_t rv = entry->func(entry->data, 1);
if (rv < 0) {
success = 0U;
break;
} else if (rv == 2) {
break;
}
}
return NULL;
}
/* Initialize per-thread event handling */
static int32_t eventThreadInit(void)
{
/* No special initialization required for this method */
return 1;
}
/* Register a new block with the event management */
static int32_t eventThreadRegister(
NvSciStreamBlock blockHandle,
void* blockData,
BlockFunc blockFunc)
{
/* Sanity check to make sure we left room for enough blocks */
if (numBlocks >= MAX_BLOCKS) {
printf("Exceeded maximum number of blocks\n");
return 0;
}
/* Grab the next entry in the list for the new block and fill it in */
BlockEventData* entry = &blocks[numBlocks++];
entry->handle = blockHandle;
entry->data = blockData;
entry->func = blockFunc;
/* Spawn a thread */
int32_t rv = pthread_create(&entry->thread,
NULL,
eventThreadFunc,
(void*)entry);
if (rv != 0) {
printf("Failed to spawn thread to monitor block\n");
return 0;
}
return 1;
}
/* Main per-thread event loop */
static int32_t eventThreadLoop(void)
{
/*
* Each block has its own thread loop. This main function just needs
* to wait for all of them to exit, and then return any error. This
* waiting can be done in any order.
*/
for (uint32_t i=0; i<numBlocks; ++i) {
(void)pthread_join(blocks[i].thread, NULL);
}
return success;
}
/* Table of functions for per-thread event handling */
EventFuncs const eventFuncs_Threads = {
.init = eventThreadInit,
.reg = eventThreadRegister,
.loop = eventThreadLoop
};

711
event_sample_app/main.c Normal file
View File

@@ -0,0 +1,711 @@
/* NvSciStream Event Loop Driven Sample App - main application
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* Application info:
* This application creates a single stream with up to 4 consumers.
* Depending on command line options, the consumers can run in the same
* process as the producer, separate processes, or some combination
* thereof.
* Each consumer can be separately set to use a FIFO or mailbox queue,
* and an optional limiter block can be requested for each one.
*
* An application can check for NvSciStream events either by polling
* or waiting for a single block at a time through a NvSciStream function,
* or by binding an NvSciEventService to the blocks, allowing it to
* wait for events on multiple NvSciStream blocks (as well as other
* components capable of signalling NvSciEvents) simultaneously.
* If an application carefully controls the order of operations, it
* may be able to wait for specific events in a single thread. But
* more generally, NvSciStream supports an event loop driven model,
* where any event may occur and the application responds appropriately.
* This can be done either with a single thread handling all blocks,
* or separate threads for each block. This application provides
* examples of both use cases.
*
* For testing and demonstration purposes, the default target setup for
* the NVIDIA SDK includes indexed NvSciIpc channels with base name
* "nvscistream_", which are connected together in even/odd pairs.
* (So nvscistream_0 is connected to nvscistream_1, nvscistream_2 is
* connected to nvscistream_3, and so on.) This sample application is
* hard-coded to use these channels when streaming between processes.
* A production application should be modified to use the channels
* defined for the production target.
*
* This application is intended to illustrate how to do full setup of a
* stream, assuming everything is working correctly. It does all
* necessary error checking to confirm setup succeeded, but does not
* attempt any recovery or do a full teardown in the event a failure
* is detected.
*
* Our approach to abstracting the event and per-block support is object
* oriented and would lend itself well to C++. But for simplicity,
* since NvSciStream itself is a C interface, we have restricted this
* sample application to C code.
*
* Unless otherwise stated, all functions in all files return 1 to
* indicate success and 0 to indicate failure.
*/
#include <unistd.h>
#include <stdio.h>
#include "nvscisync.h"
#include "nvscibuf.h"
#include "nvsciipc.h"
#include "nvscistream.h"
#include "event_loop.h"
#include "block_info.h"
/* Base name for all IPC channels */
static const char ipcBaseName[] = "nvscistream_";
/* Event handling function table */
EventFuncs const* eventFuncs = NULL;
/* Top level use-case setup function pointers */
int32_t (*createProducer)(
NvSciStreamBlock* producer,
NvSciStreamBlock pool) = createProducer_Usecase1;
int32_t (*createConsumer)(
NvSciStreamBlock* consumer,
NvSciStreamBlock queue,
uint32_t index) = createConsumer_Usecase1;
/* NvSci modules */
NvSciSyncModule sciSyncModule;
NvSciBufModule sciBufModule;
/* NvSciIpc Endpoint */
NvSciIpcEndpoint ipcEndpoint = 0U;
/* Common options for all blocks */
CommonOptions opts;
/* Options for producer */
typedef struct {
uint32_t resident;
uint32_t numConsumer;
uint32_t numPacket;
} ProducerOptions;
/* Options for each consumer */
typedef struct {
uint32_t resident;
uint32_t useMailbox;
uint32_t useLimiter;
uint32_t c2cMode;
uint32_t c2cSrcUseMailbox;
uint32_t c2cDstNumPacket;
char srcChannel[32];
char dstChannel[32];
} ConsumerOptions;
/* Print command line options */
static void print_usage(const char *str)
{
printf("%s [options]\n", str);
printf(" For single- or inter-process/chip operation:\n");
printf(" -m <count> [default 1, max %d]\n", MAX_CONSUMERS);
printf(" number of multicast consumers\n");
printf(" (ignored if process doesn't own producer\n");
printf(" -f <count> [default 3]\n");
printf(" number of packets in main pool\n");
printf(" (ignored if process doesn't own producer\n");
printf(" -l <index> <limit> [default - not used]\n");
printf(" use limiter block for indexed consumer\n");
printf(" (ignored if process doesn't own producer\n");
printf(" -q <index> {f|m} [default f]\n");
printf(" use fifo (f) or maibox (m) for indexed consumer\n");
printf(" (ignored if process doesn't own the indexed consumer\n");
printf(" -e {s|t} [default s]\n");
printf(" s : events are handled through a single service\n");
printf(" t : events are handled with separate per-block threads\n");
printf(" -s {y|r} [default r]\n");
printf(" y : NvSciColor_Y8U8Y8V8 Image Color Format in use case 2\n");
printf(" r : NvSciColor_A8R8G8B8 Image Color Format in use case 2\n");
printf(" -u <index> [default 1]\n");
printf(" use case (must be same for all processes)\n");
printf(" 1 : CUDA (rt) producer to CUDA (rt) consumer\n");
#if (NV_SUPPORT_NVMEDIA == 1)
printf(" 2 : NvMedia producer to CUDA (rt) consumer\n");
#endif
printf(" -i [default - not used]\n");
printf(" set endpoint info and query info from other endpoints\n");
printf(" For inter-process operation:\n");
printf(" -p\n");
printf(" producer resides in this process\n");
printf(" -c <index> \n");
printf(" indexed consumer resides in this process\n");
printf(" For inter-chip (C2C) operation:\n");
printf(" -P <index> <Ipc endpoint name>\n");
printf(" producer resides in this process\n");
printf(" Ipc endpoint used by the producer to communicate with the "
"indexed chip-to-chip (C2C) consumer\n");
printf(" -C <index> <Ipc endpoint name>\n");
printf(" indexed consumer resides in this process\n");
printf(" Ipc endpoint used by this chip-to-chip (C2C) consumer\n");
printf(" -C and -c can't be used simultaneously.\n");
printf(" (ignored if process owns producer)\n");
printf(" -F <index> <count> [default 3]\n");
printf(" number of packets in pool attached to the IpcDst block "
"of the indexed C2C consumer\n");
printf(" set along with the indexed C2C consumer.\n");
printf(" (ignored if process doesn't own indexed C2C consumer)\n");
printf(" -Q <index> {f|m} [default f]\n");
printf(" use fifo (f) or maibox (m) for C2C IpcSrc of indexed "
"consumer.\n");
printf(" Can't specify same index as -c)\n");
printf(" set in the producer process.\n");
printf(" (ignored if process doesn't own producer)\n");
}
/* Set up chain of producer side blocks, up to optional multicast */
static int32_t setupProducerChain(
NvSciStreamBlock* producerLink,
ProducerOptions* prodOpts)
{
/* Create pool */
NvSciStreamBlock poolBlock;
if (!createPool(&poolBlock, prodOpts->numPacket, false)) {
return 0;
}
/* Create producer */
NvSciStreamBlock producerBlock;
if (!createProducer(&producerBlock, poolBlock)) {
return 0;
}
/* If multicast required, add the block. */
if (prodOpts->numConsumer > 1) {
/* Create multicast block */
NvSciStreamBlock multicastBlock;
if (!createMulticast(&multicastBlock, prodOpts->numConsumer)) {
return 0;
}
/* Connect to producer */
if (NvSciError_Success !=
NvSciStreamBlockConnect(producerBlock, multicastBlock)) {
printf("Failed to connect multicast to producer\n");
return 0;
}
/* Multicast block is end of chain */
*producerLink = multicastBlock;
} else {
/* Producer block is end of chain */
*producerLink = producerBlock;
}
return 1;
}
/* Set up chain of consumer side blocks */
static int32_t setupConsumerChain(
NvSciStreamBlock* consumerLink,
ConsumerOptions* consOpts,
uint32_t index)
{
/*
* Note: Currently the consumer "chain" just consists of the consumer
* itself and its associated queue. We follow this paradigm to
* allow easy addition of new optional blocks in the future.
*/
/* Create queue */
NvSciStreamBlock queueBlock;
if (!createQueue(&queueBlock, consOpts->useMailbox)) {
return 0;
}
/* Create consumer */
NvSciStreamBlock consumerBlock;
if (!createConsumer(&consumerBlock, queueBlock, index)) {
return 0;
}
/* Consumer block is start of chain */
*consumerLink = consumerBlock;
return 1;
}
/* Add additional branch options */
static int32_t setupBranchOptions(
NvSciStreamBlock* consumerLink,
ConsumerOptions* consOpts)
{
/* If limiter requested, add it */
if (consOpts->useLimiter) {
/* If a consumer may generate unreliable fences, a ReturnSync block can
* be added as the downstream of the Limiter block for that consumer,
* to isolate any packets with bad fences.
*/
NvSciStreamBlock returnSyncBlock;
if (!createReturnSync(&returnSyncBlock)) {
return 0;
}
/* Connect to incoming consumer chain */
if (NvSciError_Success !=
NvSciStreamBlockConnect(returnSyncBlock, *consumerLink)) {
printf("Failed to connect returnSyncBlock to consumer chain\n");
return 0;
}
/* ReturnSync is new end of chain */
*consumerLink = returnSyncBlock;
/* Create limiter */
NvSciStreamBlock limiterBlock;
if (!createLimiter(&limiterBlock, consOpts->useLimiter)) {
return 0;
}
/* Connect to incoming consumer chain */
if (NvSciError_Success !=
NvSciStreamBlockConnect(limiterBlock, *consumerLink)) {
printf("Failed to connect limiter to consumer chain\n");
return 0;
}
/* Limiter is new end of chain */
*consumerLink = limiterBlock;
}
return 1;
}
/* Set up IPC from producer to consumer */
static int32_t setupProdToConsIPC(
NvSciStreamBlock* consumerLink,
ConsumerOptions* consOpts)
{
if (!consOpts->c2cMode) {
/* Create IPC block */
if (!createIpcSrc(consumerLink, consOpts->srcChannel)) {
return 0;
}
} else {
/* Create a queue for C2C src block */
NvSciStreamBlock queueBlock;
if (!createQueue(&queueBlock, consOpts->c2cSrcUseMailbox)) {
return 0;
}
/* Create C2C block */
if (!createC2cSrc(consumerLink, consOpts->srcChannel, queueBlock)) {
return 0;
}
/* If mailbox is used with C2CSrc, then create presentSync block */
if (1U == consOpts->c2cSrcUseMailbox) {
NvSciStreamBlock presentSyncBlock;
if (!createPresentSync(&presentSyncBlock)) {
return 0;
}
if (NvSciError_Success !=
NvSciStreamBlockConnect(presentSyncBlock, *consumerLink)) {
printf("Failed to connect PresentSync to consumer chain\n");
return 0;
}
/* PresentSync is new end of chain */
*consumerLink = presentSyncBlock;
}
}
return 1;
}
/* Set up IPC from consumer to producer */
static int32_t setupConsToProdIPC(
NvSciStreamBlock* producerLink,
ConsumerOptions* consOpts)
{
if (!consOpts->c2cMode) {
/* Create IPC block */
if (!createIpcDst(producerLink, consOpts->dstChannel)) {
return 0;
}
} else {
/* Create a pool for C2C dst block */
NvSciStreamBlock poolBlock;
if (!createPool(&poolBlock, consOpts->c2cDstNumPacket, true)) {
return 0;
}
/* Create C2C block */
if (!createC2cDst(producerLink, consOpts->dstChannel, poolBlock)) {
return 0;
}
}
return 1;
}
/*
* Main application function.
* As per standards, return of 0 indicates success and anything
* else is failure.
*/
int main(int argc, char *argv[])
{
uint32_t i;
int ret = 0;
/* Initialize parameters */
uint32_t badParam = 0U;
uint32_t multiProcess = 0U;
uint32_t multiSOC = 0U;
uint32_t eventOption = 0U;
ProducerOptions prodOpts = {.resident=0U, .numConsumer=1U, .numPacket=3U};
ConsumerOptions consOpts[MAX_CONSUMERS];
memset(consOpts, 0, sizeof(consOpts));
memset(&opts, 0, sizeof(CommonOptions));
/* Parse command line */
int32_t opt;
while ((opt = getopt(argc, argv, "m:f:l:q:e:s:u:ipc:P:C:F:Q:")) != EOF) {
switch (opt) {
case 'm': /* set number of consumers */
prodOpts.numConsumer = atoi(optarg);
if ((prodOpts.numConsumer < 1U) ||
(prodOpts.numConsumer > MAX_CONSUMERS)) {
badParam = 1U;
}
break;
case 'f': /* set number of packets */
prodOpts.numPacket = atoi(optarg);
if ((prodOpts.numPacket < 1U) ||
(prodOpts.numPacket > MAX_PACKETS)) {
badParam = 1U;
}
break;
case 'l': /* use limiter block for indexed consumer */
i = atoi(optarg);
if (i >= MAX_CONSUMERS) {
badParam = 1U;
} else {
consOpts[i].useLimiter = atoi(argv[optind++]);
}
break;
case 'q': /* use specified queue for indexed consumer */
i = atoi(optarg);
if (i >= MAX_CONSUMERS) {
badParam = 1U;
} else {
char t = argv[optind++][0];
if (t == 'm') {
consOpts[i].useMailbox = 1U;
} else if (t == 'f') {
consOpts[i].useMailbox = 0U;
} else {
badParam = 1U;
}
}
break;
case 'e': /* set event handling mechanism */
if (optarg[0] == 's') {
eventOption = 0U;
} else if (optarg[0] == 't') {
eventOption = 1U;
} else {
badParam = 1U;
}
break;
case 's': /* set Image Color Format type */
if (optarg[0] == 'r') {
opts.yuv = false;
} else if (optarg[0] == 'y') {
opts.yuv = true;
} else {
badParam = 1U;
}
break;
case 'u': /* set use case */
i = atoi(optarg);
if (i == 1) {
createProducer = createProducer_Usecase1;
createConsumer = createConsumer_Usecase1;
}
#if (NV_SUPPORT_NVMEDIA == 1)
else if (i == 2) {
createProducer = createProducer_Usecase2;
createConsumer = createConsumer_Usecase2;
}
#endif
else {
badParam = 1U;
}
break;
case 'i':
opts.endInfo = true;
break;
/* For inter - process operation */
case 'p': /* set producer resident */
prodOpts.resident = 1U;
multiProcess = 1U;
break;
case 'c': /* set consumer resident */
i = atoi(optarg);
if (i >= MAX_CONSUMERS) {
badParam = 1U;
} else {
consOpts[i].resident = 1U;
multiProcess = 1U;
}
break;
/* For inter - chip (C2C) operation */
case 'P': /* set ipc endpoint for C2C */
i = atoi(optarg);
if (i >= MAX_CONSUMERS) {
badParam = 1U;
} else {
/* Ipc channel used to communicate with this C2C consumer */
strcpy(consOpts[i].srcChannel, argv[optind++]);
consOpts[i].c2cMode = 1U;
prodOpts.resident = 1U;
multiProcess = 1U;
multiSOC = 1U;
}
break;
case 'C': /* set C2C mode */
i = atoi(optarg);
if (i >= MAX_CONSUMERS) {
badParam = 1U;
} else {
/* Ipc channel name used by this C2C consumer */
strcpy(consOpts[i].dstChannel, argv[optind++]);
consOpts[i].c2cMode = 1U;
multiProcess = 1U;
multiSOC = 1U;
if (consOpts[i].c2cDstNumPacket == 0U) {
/* default packet size 3 if not set already */
consOpts[i].c2cDstNumPacket = 3U;
}
}
break;
case 'F': /* set number of packets for C2C Dst of indexed consumer */
i = atoi(optarg);
if (i >= MAX_CONSUMERS) {
badParam = 1U;
} else {
consOpts[i].c2cDstNumPacket = atoi(argv[optind++]);
if ((consOpts[i].c2cDstNumPacket < 1U) ||
(consOpts[i].c2cDstNumPacket > MAX_PACKETS)) {
badParam = 1U;
}
}
break;
case 'Q': /* use specified queue for C2C Src of indexed consumer */
i = atoi(optarg);
if (i >= MAX_CONSUMERS) {
badParam = 1U;
} else {
char t = argv[optind++][0];
if (t == 'm') {
consOpts[i].c2cSrcUseMailbox = 1U;
} else if (t == 'f') {
consOpts[i].c2cSrcUseMailbox = 0U;
} else {
badParam = 1U;
}
}
break;
default:
badParam = 1U;
break;
}
}
/* Handle parsing failure */
if (badParam) {
print_usage(argv[0]);
return 1;
}
/* Check validity of the combination C2C & non-C2C consumers */
for (i=0U; i<MAX_CONSUMERS; ++i) {
if (prodOpts.resident) {
/* C2C consumer cannot be in the same process as producer */
if (consOpts[i].resident && consOpts[i].c2cMode) {
return 1;
}
} else {
/* There is C2C consumer in this process,
* can't have non-C2C ones
*/
if (multiSOC && consOpts[i].resident) {
return 1;
}
/* Now make consumer resident if C2C */
if (consOpts[i].c2cMode) {
consOpts[i].resident = 1U;
}
}
}
/* Fill in other options based on those specified */
if (!multiProcess) {
/* If no multi-process option specified, everything is resident */
prodOpts.resident = 1U;
for (i=0U; i<prodOpts.numConsumer; ++i) {
consOpts[i].resident = 1U;
}
} else {
/* If not in producer process, will just loop over full list */
if (!prodOpts.resident) {
prodOpts.numConsumer = MAX_CONSUMERS;
}
/* Channel names are derived from base and index */
for (i=0U; i<prodOpts.numConsumer; ++i) {
if (!consOpts[i].c2cMode) {
sprintf(consOpts[i].srcChannel, "%s%d", ipcBaseName, 2*i+0);
sprintf(consOpts[i].dstChannel, "%s%d", ipcBaseName, 2*i+1);
}
}
}
/* Select and initialize event-handling based on chosen method */
eventFuncs = eventOption ? &eventFuncs_Threads : &eventFuncs_Service;
if (!eventFuncs->init() != 0) {
return 1;
}
/*
* Initialize NvSci libraries
*/
if (NvSciError_Success != NvSciSyncModuleOpen(&sciSyncModule)) {
printf("Unable to open NvSciSync module\n");
}
if (NvSciError_Success != NvSciBufModuleOpen(&sciBufModule)) {
printf("Unable to open NvSciBuf module\n");
}
if (NvSciError_Success != NvSciIpcInit()) {
printf("Unable to initialize NvSciIpc\n");
}
/*
* If producer is resident, create producer block chain and attach
* all consumers.
*/
if (prodOpts.resident) {
/* Set up producer chain (up through any multicast block) */
NvSciStreamBlock producerLink;
if (!setupProducerChain(&producerLink, &prodOpts)) {
return 1;
}
/*
* For each consumer, either set up the consumer chain or create
* the IPC block to communicate with it, depending on whether the
* consumer is resident.
*/
for (i=0U; i<prodOpts.numConsumer; ++i) {
/* Create consumer or IPC to consumer */
NvSciStreamBlock consumerLink;
if (consOpts[i].resident) {
if (!setupConsumerChain(&consumerLink, &consOpts[i], i)) {
return 1;
}
} else {
if (!setupProdToConsIPC(&consumerLink, &consOpts[i])) {
return 1;
}
}
/* Add any other options (e.g. limiter) for this branch */
if (!setupBranchOptions(&consumerLink, &consOpts[i])) {
return 1;
}
/* Attach to producer chain */
if (NvSciError_Success !=
NvSciStreamBlockConnect(producerLink, consumerLink)) {
printf("Failed to connect consumer %d to producer\n", i);
return 1;
}
}
}
/*
* Otherwise, create any consumer chains resident in this process,
* and connect with IPC back to the producer process.
*/
else {
for (i=0U; i<prodOpts.numConsumer; ++i) {
if (consOpts[i].resident) {
/* Create consumer */
NvSciStreamBlock consumerLink;
if (!setupConsumerChain(&consumerLink, &consOpts[i], i)) {
return 1;
}
/* Create IPC block */
NvSciStreamBlock producerLink;
if (!setupConsToProdIPC(&producerLink, &consOpts[i])) {
return 1;
}
/* Connect blocks */
if (NvSciError_Success !=
NvSciStreamBlockConnect(producerLink, consumerLink)) {
printf("Failed to connect consumer %d to producer\n", i);
return 1;
}
}
}
}
/* Enter event loop(s) until all blocks are done */
if (!eventFuncs->loop()) {
ret = 1;
}
if (sciBufModule != NULL) {
NvSciBufModuleClose(sciBufModule);
sciBufModule = NULL;
}
if (sciSyncModule != NULL) {
NvSciSyncModuleClose(sciSyncModule);
sciSyncModule = NULL;
}
/* Close the NvSciIpc endpoint */
if (ipcEndpoint) {
if (NvSciError_Success !=
NvSciIpcCloseEndpointSafe(ipcEndpoint, false)) {
printf("Failed to close ipc endpoint\n");
}
ipcEndpoint = 0U;
}
NvSciIpcDeinit();
return ret;
}

View File

@@ -0,0 +1,53 @@
/* NvSciStream Event Loop Driven Sample App - usecase #1
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* This use case consists of CUDA producer and CUDA consumer(s).
* It makes use of the CUDA runtime toolkit.
*
* There are two packet elements, a large data buffer and a small
* buffer containing a CRC checksum value.
*
* The producer operation is very simple. It takes a local source buffer
* filled with simple data, and issues a CUDA command to asynchronously
* copy it to the packet's data buffer. It generates a checksum from the
* source buffer and puts that in the packet's CRC buffer.
*
* The consumer(s) similarly issues a CUDA command to copy the packet's
* data buffer to a local buffer. When finished, it generates a checksum
* from the the local copy and compares it to the value in the packet's
* CRC buffer.
*
* The data buffer is processed through the CUDA engine, with commands issued
* asynchronously. Sync objects must be used to coordinate when it is safe
* to write and read the buffer. The CRC buffer is written and read directly
* through the CPU. It uses immediate mode and it is not necessary to wait
* for the sync objects before accessing it.
*
* In addition to the normal case where producers signal sync objects that
* consumers wait for, and vice versa, this use case also needs the
* producer to be able to wait for the fences it generates, in order
* to protect its local buffer from modification while still in use.
* So this use case also provides an example of CPU waiting for fences.
*/
#ifndef _USECASE1_H
#define _USECASE1_H 1
/* Names for the packet elements */
#define ELEMENT_NAME_DATA 0xdada
#define ELEMENT_NAME_CRC 0xcc
/* Names for the endpoint info */
#define ENDINFO_NAME_PROC 0xabcd
#endif // _USECASE1_H

62
event_sample_app/util.c Normal file
View File

@@ -0,0 +1,62 @@
/* NvSciStream Event Loop Driven Sample App - utility functions
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "util.h"
#define CRC32_POLYNOMIAL 0xEDB88320L
static uint32_t calculateBufferCRC(
uint32_t count,
uint32_t crc,
uint8_t* buffer)
{
static uint32_t crcTable[256];
static int initialized = 0;
uint32_t i, j, tmp;
if (!initialized) {
for (i = 0; i <= 255; i++) {
tmp = i;
for (j = 8; j > 0; j--) {
if (tmp & 1) {
tmp = (tmp >> 1) ^ CRC32_POLYNOMIAL;
} else {
tmp >>= 1;
}
}
crcTable[i] = tmp;
}
initialized = 1;
}
while (count-- != 0) {
tmp = (crc >> 8) & 0x00FFFFFFL;
crc = tmp ^ crcTable[((uint32_t) crc ^ *buffer++) & 0xFF];
}
return crc;
}
uint32_t generateCRC(
uint8_t *data_ptr,
uint32_t height,
uint32_t width,
uint32_t pitch)
{
uint32_t y = 0U;
uint32_t crc = 0U;
for (y = 0U; y < height; y++) {
crc = calculateBufferCRC(width, crc, data_ptr);
data_ptr += pitch;
}
return crc;
}

25
event_sample_app/util.h Normal file
View File

@@ -0,0 +1,25 @@
/* NvSciStream Event Loop Driven Sample App - utilities
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#ifndef _UTIL_H
#define _UTIL_H 1
#include <stdint.h>
/* CRC checksum generator */
extern uint32_t generateCRC(
uint8_t *data_ptr,
uint32_t height,
uint32_t width,
uint32_t pitch);
#endif // _UTIL_H

1
push_info.txt Normal file
View File

@@ -0,0 +1 @@
jetson_36.4.3

140
rawstream/Makefile Normal file
View File

@@ -0,0 +1,140 @@
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA Corporation and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA Corporation is strictly prohibited.
#
# Location of common libraries
LIB_DIR = /usr/lib/aarch64-linux-gnu
# NOTE: This directory PATH will be moved from "tegra" to "nvidia".
TEGRA_LIB_DIR ?= /usr/lib/aarch64-linux-gnu/tegra
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
# Location of NVSCI header
NVSCI_HEADER_DIR ?= /usr/include/nvsci_headers
NVSCI_LIB_DIR = $(TEGRA_LIB_DIR)
GCC ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(GCC)
# internal flags
NVCCFLAGS :=
CCFLAGS :=
LDFLAGS :=
# Extra user flags
EXTRA_NVCCFLAGS ?=
EXTRA_LDFLAGS ?=
EXTRA_CCFLAGS ?=
override abi := aarch64
LDFLAGS += --dynamic-linker=/lib/ld-linux-aarch64.so.1
# Debug build flags
dbg = 0
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
TARGET := debug
else
TARGET := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
################################################################################
# Common includes and paths
INCLUDES := -I./
LIBRARIES := -L$(LIB_DIR)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
CUDA_SEARCH_PATH += $(CUDA_PATH)/lib/stubs
CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
ifeq ("$(CUDALIB)","")
$(error ERROR - libcuda.so not found, CUDA Driver is not installed or CUDA_PATH is not correctly set.)
else
CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
LIBRARIES += -L$(CUDALIB) -lcuda -lrt
endif
# Includes and paths for NVSCI libraries
NVSCIBUFHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscibuf.h -print 2>/dev/null)
NVSCISYNCHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscisync.h -print 2>/dev/null)
NVSCIIPCHEADER := $(shell find -L $(NVSCI_HEADER_DIR) -name nvsciipc.h -print 2>/dev/null)
ifeq ("$(NVSCIBUFHEADER)","")
$(error ERROR - nvscibuf.h not found in $(NVSCI_HEADER_DIR))
endif
ifeq ("$(NVSCISYNCHEADER)","")
$(error ERROR - nvscisync.h not found in $(NVSCI_HEADER_DIR))
endif
ifeq ("$(NVSCIIPCHEADER)","")
$(error ERROR - nvsciipc.h not found in $(NVSCI_HEADER_DIR))
endif
INCLUDES += -I$(NVSCI_HEADER_DIR)
LIBRARIES += -L$(NVSCI_LIB_DIR) -lnvscibuf -lnvscisync -lnvscievent -lnvsciipc -lnvscistream
ALL_CCFLAGS += --std=c++11 --threads 0
# CUDA code generation flags
# Gencode arguments
SMS ?= 53 61 70 72 75 80 86 87
ifeq ($(GENCODE_FLAGS),)
# Generate SASS code for each SM architecture listed in $(SMS)
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
HIGHEST_SM := $(lastword $(sort $(SMS)))
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
endif
################################################################################
# Target rules
OUTPUT := rawstream
all: build
build: $(OUTPUT)
OBJ := rawstream_consumer.o
OBJ += rawstream_cuda.o
OBJ += rawstream_ipc_linux.o
OBJ += rawstream_main.o
OBJ += rawstream_producer.o
%.o: %.c
$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
$(OUTPUT): $(OBJ)
$(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
run: build
$(OUTPUT)
testrun: build
clean:
rm -f $(OBJ) $(OUTPUT)
clobber: clean

36
rawstream/README.txt Normal file
View File

@@ -0,0 +1,36 @@
Rawstream Sample App - README
Copyright (c) 2022 NVIDIA Corporation. All rights reserved.
NVIDIA Corporation and its licensors retain all intellectual property and
proprietary rights in and to this software, related documentation and any
modifications thereto. Any use, reproduction, disclosure or distribution
of this software and related documentation without an express license
agreement from NVIDIA Corporation is strictly prohibited.
---
# rawstream - NvStreams Rawstream Sample App
## Description
This directory contains a raw stream sample application using NvSciBuf,
NvSciSync and NvSciIpc.
## Build the application
The rawstream sample includes source code and a Makefile.
1. On the host system, navigate to the sample application directory:
$ cd <top>/samples/nvsci/rawstream/
2. Build the sample application:
$ make clean
$ make
## Examples of how to run the sample application:
$ sudo ./rawstream -p &
$ sudo ./rawstream -c

148
rawstream/rawstream.h Normal file
View File

@@ -0,0 +1,148 @@
/*
* Copyright (c) 2020-2021 NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#ifndef _rawstream_h
#define _rawstream_h
#include <unistd.h>
#include <pthread.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <assert.h>
#include <nvscisync.h>
#include <nvscibuf.h>
#include "cuda.h"
#include "cuda_runtime_api.h"
// Constants controlling configuration
#define totalFrames 32U
#define totalBuffers 4U
// Sync/Buf modules
extern NvSciSyncModule syncModule;
extern NvSciBufModule bufModule;
// Exchanged sync info
extern NvSciSyncAttrList producerSignalAttrs;
extern NvSciSyncAttrList consumerSignalAttrs;
extern NvSciSyncAttrList producerWaitAttrs;
extern NvSciSyncAttrList consumerWaitAttrs;
extern NvSciSyncAttrList prodToConsAttrs;
extern NvSciSyncAttrList consToProdAttrs;
extern NvSciSyncObj consumerSignalObj;
extern NvSciSyncObj producerSignalObj;
extern NvSciSyncObj consumerWaitObj;
extern NvSciSyncObj producerWaitObj;
// Exchanged buf info
extern NvSciBufAttrList producerWriteAttrs;
extern NvSciBufAttrList consumerReadAttrs;
extern NvSciBufAttrList combinedBufAttrs;
// CUDA info common to producer and consumer
typedef struct {
int deviceId;
CUuuid uuid;
cudaStream_t stream;
cudaExternalSemaphore_t signalerSem;
cudaExternalSemaphore_t waiterSem;
NvSciBufType bufType;
uint64_t bufSize;
uint8_t* bufCopy;
} CudaClientInfo;
// List of buffers with status
typedef struct {
// Buffer handle
NvSciBufObj obj;
// CUDA external memory object
cudaExternalMemory_t extMem;
// Mapping into virtual memory
uint8_t* ptr;
// Current owner (0 = producer, 1 = consumer)
uint32_t owner;
// Fence to wait for
NvSciSyncFence fence;
// Checksum for error checking
uint32_t crc;
} Buffer;
extern Buffer buffers[totalBuffers];
// packet data
// Note: The checksum is not, in general, needed in a real streaming
// application. All that is required is something to identify
// the buffer and provide the fences. See comments in the producer
// and consumer for the reason for the checksum.
typedef struct {
// buffer identifier
uint32_t bufferId;
// buffer checksum
uint32_t crc;
// Fence to wait for
NvSciSyncFenceIpcExportDescriptor fenceDesc;
} Packet;
// IPC related info
typedef struct {
// NvSciIpc handle
NvSciIpcEndpoint endpoint;
// IPC channel info
struct NvSciIpcEndpointInfo info;
// QNX: Channel id to get event
int32_t chId;
// QNX: Connection id to send event in library
int32_t connId;
// Linux: IPC event fd
int32_t ipcEventFd;
} IpcWrapper;
extern IpcWrapper ipcWrapper;
// CUDA data types
typedef struct cudaExternalSemaphoreHandleDesc cudaExternalSemaphoreHandleDesc;
typedef struct cudaExternalMemoryHandleDesc cudaExternalMemoryHandleDesc;
typedef struct cudaExternalMemoryBufferDesc cudaExternalMemoryBufferDesc;
typedef struct cudaExternalSemaphoreWaitParams cudaExternalSemaphoreWaitParams;
typedef struct cudaExternalSemaphoreSignalParams cudaExternalSemaphoreSignalParams;
// Utility functions
extern uint32_t GenerateCRC(uint8_t* data, uint32_t width, uint32_t height, uint32_t pitch);
// Thread functions
extern void* producerFunc(void*);
extern void* consumerFunc(void*);
// IPC functions
extern NvSciError ipcInit(const char* endpointName, IpcWrapper* ipcWrapper);
extern NvSciError ipcSend(IpcWrapper* ipcWrapper, const void* buf, const size_t size);
extern NvSciError ipcRecvFill(IpcWrapper* ipcWrapper, void* buf, const size_t size);
extern void ipcDeinit(IpcWrapper* ipcWrapper);
// CUDA-specific operations
extern bool setupCuda(CudaClientInfo* info);
extern bool setupCudaSync(CudaClientInfo* info,
NvSciSyncObj sciSignalObj,
NvSciSyncObj sciWaitObj);
extern bool setupCudaBufAttr(CudaClientInfo* info,
NvSciBufAttrList attrs);
extern bool setupCudaBuffer(CudaClientInfo* info,
Buffer* buf);
extern bool waitCudaFence(CudaClientInfo* info,
Buffer* buf);
extern bool signalCudaFence(CudaClientInfo* info,
Buffer* buf);
extern void deinitCuda(CudaClientInfo* info);
extern void deinitCudaBuffer(Buffer* buf, int num);
#endif // _rawstream_h

View File

@@ -0,0 +1,591 @@
/*
* Copyright (c) 2020-2021 NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "rawstream.h"
void* consumerFunc(void* arg)
{
CudaClientInfo cudaInfo;
NvSciError sciErr;
int cudaErr;
*(int*)arg = 1;
fprintf(stderr, "Consumer starting\n");
// Do common cuda initialization
if (!setupCuda(&cudaInfo)) {
goto done;
}
// Create an empty sync attribute list for signaling permissions.
sciErr = NvSciSyncAttrListCreate(syncModule, &consumerSignalAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to create consumer signal attrs (%x)\n",
sciErr);
goto done;
}
// Query CUDA for attributes needed to signal syncs
cudaErr = cudaDeviceGetNvSciSyncAttributes(consumerSignalAttrs,
cudaInfo.deviceId,
cudaNvSciSyncAttrSignal);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Could not query signal attributes from CUDA (%d)\n",
cudaErr);
goto done;
}
fprintf(stderr, "Consumer signal attributes established\n");
// Create an empty sync attribute list for waiting permissions.
sciErr = NvSciSyncAttrListCreate(syncModule, &consumerWaitAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to create consumer wait attrs (%x)\n",
sciErr);
goto done;
}
// Query CUDA for attributes needed to wait for syncs
cudaErr = cudaDeviceGetNvSciSyncAttributes(consumerWaitAttrs,
cudaInfo.deviceId,
cudaNvSciSyncAttrWait);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Could not query wait attributes from CUDA (%d)\n",
cudaErr);
goto done;
}
fprintf(stderr, "Consumer wait attributes established\n");
// Export consumer's wait attributes to a form suitable for IPC
size_t sendWaitAttrListSize = 0U;
void* sendWaitListDesc = NULL;
sciErr = NvSciSyncAttrListIpcExportUnreconciled(&consumerWaitAttrs,
1,
ipcWrapper.endpoint,
&sendWaitListDesc,
&sendWaitAttrListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to export consumer wait attrs (%x)\n",
sciErr);
goto done;
}
// Send the size of the consumer's wait attributes to the producer,
// so it knows how much data to expect
sciErr = ipcSend(&ipcWrapper,
&sendWaitAttrListSize,
sizeof(sendWaitAttrListSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to send consumer wait attrs size (%x)\n",
sciErr);
goto done;
}
// Send the exported form of the consumer's wait attributes
sciErr = ipcSend(&ipcWrapper,
sendWaitListDesc,
sendWaitAttrListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to send consumer wait attrs (%x)\n", sciErr);
goto done;
}
// Wait to receive the size of the producer's wait attributes
size_t recvWaitAttrListSize = 0U;
sciErr = ipcRecvFill(&ipcWrapper,
&recvWaitAttrListSize,
sizeof(recvWaitAttrListSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to recv producer wait attr size (%x)\n",
sciErr);
goto done;
}
// Allocate a buffer big enough to receive the producer's wait attributes
void* recvWaitListDesc = malloc(recvWaitAttrListSize);
if (recvWaitListDesc == NULL) {
sciErr = NvSciError_InsufficientMemory;
fprintf(stderr,
"Sync attr allocation failed (%x)\n",
sciErr);
goto done;
}
// Wait to receive producer's wait attributes
sciErr = ipcRecvFill(&ipcWrapper,
recvWaitListDesc,
recvWaitAttrListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to recv producer wait attrs (%x)\n",
sciErr);
goto done;
}
// Convert the received producer wait attributes to an attribute list
sciErr = NvSciSyncAttrListIpcImportUnreconciled(syncModule,
ipcWrapper.endpoint,
recvWaitListDesc,
recvWaitAttrListSize,
&producerWaitAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to import producer wait attrs (%x)\n",
sciErr);
goto done;
}
// Get combined attributes for consumer to producer signaling
NvSciSyncAttrList syncAllAttrs[2], syncConflictAttrs;
syncAllAttrs[0] = consumerSignalAttrs;
syncAllAttrs[1] = producerWaitAttrs;
sciErr = NvSciSyncAttrListReconcile(syncAllAttrs, 2,
&consToProdAttrs, &syncConflictAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't merge consumer->producer attrs (%x)\n",
sciErr);
goto done;
}
// Allocate consumer to producer sync object
sciErr = NvSciSyncObjAlloc(consToProdAttrs, &consumerSignalObj);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't allocate consumer->producer sync (%x)\n",
sciErr);
goto done;
}
// Export sync attributes and object to a form suitable for IPC
void* sendObjAndListDesc = NULL;
size_t sendObjAndListSize = 0U;
sciErr = NvSciSyncIpcExportAttrListAndObj(consumerSignalObj,
NvSciSyncAccessPerm_WaitOnly,
ipcWrapper.endpoint,
&sendObjAndListDesc,
&sendObjAndListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't export consumer->producer sync description (%x)\n",
sciErr);
goto done;
}
// Send the size of the sync description to the producer,
// so it knows how much data to expect
sciErr = ipcSend(&ipcWrapper, &sendObjAndListSize, sizeof(size_t));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't send consumer->producer sync description size(%x)\n",
sciErr);
goto done;
}
// Send the sync description to the producer
sciErr = ipcSend(&ipcWrapper, sendObjAndListDesc, sendObjAndListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't send consumer->producer sync description (%x)\n",
sciErr);
goto done;
}
// Wait to receive the size of the producer->consumer sync desription
size_t recvObjAndListSize = 0U;
sciErr = ipcRecvFill(&ipcWrapper,
&recvObjAndListSize,
sizeof(size_t));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't recv producer->consumer sync description size (%x)\n",
sciErr);
goto done;
}
// Allocate a buffer big enough to receive the description
void* recvObjAndListDesc = malloc(recvObjAndListSize);
if (NULL == recvObjAndListDesc) {
sciErr = NvSciError_InsufficientMemory;
fprintf(stderr, "Sync description allocation failed (%x)\n", sciErr);
goto done;
}
// Wait to receive producer->consumer sync description
sciErr = ipcRecvFill(&ipcWrapper,
recvObjAndListDesc,
recvObjAndListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't receive producer->consumer sync description (%x)\n",
sciErr);
goto done;
}
// Convert the received producer->consumer sync description to a
// sync attribute list and object
sciErr = NvSciSyncIpcImportAttrListAndObj(syncModule,
ipcWrapper.endpoint,
recvObjAndListDesc,
recvObjAndListSize,
&consumerWaitAttrs,
1,
NvSciSyncAccessPerm_WaitOnly,
ipcWrapper.endpoint,
&consumerWaitObj);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't import producer->consumer sync (%x)\n",
sciErr);
goto done;
}
// Set up CUDA sync objects, importing NvSciSync objects
if (!setupCudaSync(&cudaInfo, consumerSignalObj, consumerWaitObj)) {
goto done;
}
fprintf(stderr, "Consumer exchanged sync objects with producer\n");
// Create an empty buffer attribute list for consumer buffers
sciErr = NvSciBufAttrListCreate(bufModule, &consumerReadAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to create consumer buffer attrs (%x)\n",
sciErr);
goto done;
}
// Fill consumer buffer attribute list with values
NvSciBufAttrKeyValuePair bufKeyValue[4];
NvSciRmGpuId gpuId;
memcpy(&gpuId.bytes, &cudaInfo.uuid.bytes, sizeof(cudaInfo.uuid.bytes));
bufKeyValue[0].key = NvSciBufGeneralAttrKey_GpuId;
bufKeyValue[0].value = &gpuId;
bufKeyValue[0].len = sizeof(gpuId);
NvSciBufType bufType = NvSciBufType_RawBuffer;
bufKeyValue[1].key = NvSciBufGeneralAttrKey_Types;
bufKeyValue[1].value = &bufType;
bufKeyValue[1].len = sizeof(bufType);
NvSciBufAttrValAccessPerm bufPerm = NvSciBufAccessPerm_Readonly;
bufKeyValue[2].key = NvSciBufGeneralAttrKey_RequiredPerm;
bufKeyValue[2].value = &bufPerm;
bufKeyValue[2].len = sizeof(bufPerm);
bool bufAccessFlag = true;
bufKeyValue[3].key = NvSciBufGeneralAttrKey_NeedCpuAccess;
bufKeyValue[3].value = &bufAccessFlag;
bufKeyValue[3].len = sizeof(bufAccessFlag);
sciErr = NvSciBufAttrListSetAttrs(consumerReadAttrs, bufKeyValue, 4);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to fill consumer buffer attrs (%x)\n", sciErr);
goto done;
}
fprintf(stderr, "Consumer buffer attributes established\n");
// Export consumer buffer attributes in a form suitable for IPC
size_t consumerReadAttrsSize = 0U;
void* consumerReadAttrsDesc = NULL;
sciErr = NvSciBufAttrListIpcExportUnreconciled(&consumerReadAttrs,
1,
ipcWrapper.endpoint,
&consumerReadAttrsDesc,
&consumerReadAttrsSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to export consumer buffer attrs (%x)\n",
sciErr);
goto done;
}
// Send size of consumer buffer attributes
sciErr = ipcSend(&ipcWrapper,
&consumerReadAttrsSize,
sizeof(consumerReadAttrsSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to send consumer buffer attrs size (%x)\n",
sciErr);
goto done;
}
// Send consumer buffer attributes
sciErr = ipcSend(&ipcWrapper,
consumerReadAttrsDesc,
consumerReadAttrsSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to send consumer buffer attrs (%x)\n", sciErr);
goto done;
}
// Wait to receive the size of the combined buffer attributes
size_t recvBufListSize = 0U;
sciErr = ipcRecvFill(&ipcWrapper,
&recvBufListSize,
sizeof(recvBufListSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to receive combinedbuffer attr size (%x)\n",
sciErr);
goto done;
}
// Allocate a buffer big enough to receive the combined buffer attributes
void* recvBufListDesc = malloc(recvBufListSize);
if (NULL == recvBufListDesc) {
sciErr = NvSciError_InsufficientMemory;
fprintf(stderr, "Buffer attr allocation failed(%x)\n", sciErr);
goto done;
}
// Receive the combined buffer attributes
sciErr = ipcRecvFill(&ipcWrapper,
recvBufListDesc,
recvBufListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to recv combined buffer attr desc (%x)\n",
sciErr);
goto done;
}
// Convert the combined buffer attributes to an attribute list
sciErr = NvSciBufAttrListIpcImportReconciled(bufModule,
ipcWrapper.endpoint,
recvBufListDesc,
recvBufListSize,
NULL,
0,
&combinedBufAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to import combined buffer attr (%x)\n",
sciErr);
goto done;
}
// Extract attributes needed by CUDA
if (!setupCudaBufAttr(&cudaInfo, combinedBufAttrs)) {
goto done;
}
// Receive all buffers
for (uint32_t i=0U; i<totalBuffers; ++i) {
Buffer* buf = &buffers[i];
// Receive the next buffer description
NvSciBufObjIpcExportDescriptor objDesc;
sciErr = ipcRecvFill(&ipcWrapper,
&objDesc,
sizeof(NvSciBufObjIpcExportDescriptor));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to recv buffer %d from producer (%x)\n",
i, sciErr);
goto done;
}
// Convert buffer description to a buffer object
sciErr = NvSciBufObjIpcImport(ipcWrapper.endpoint,
&objDesc,
combinedBufAttrs,
NvSciBufAccessPerm_Readonly,
1000U,
&buf->obj);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to import buffer %d from producer (%x)\n",
i, sciErr);
goto done;
}
// Import the buffer into CUDA
if (!setupCudaBuffer(&cudaInfo, buf)) {
goto done;
}
}
fprintf(stderr, "Consumer buffers received and established\n");
// Receive all frames
uint32_t currFrame = 0;
uint32_t currBuffer = 0;
Packet packet;
while (currFrame < totalFrames) {
fprintf(stderr, "Consumer starting frame %d in buffer %d\n",
currFrame, currBuffer);
Buffer* buf = &buffers[currBuffer];
// Wait for buffer to be available
while (buf->owner != 1U) {
// Wait for next presented buffer
sciErr = ipcRecvFill(&ipcWrapper, &packet, sizeof(packet));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Failure to recv buffer from producer (%x)\n",
sciErr);
goto done;
}
// Import transmitted fence description to a fence
sciErr = NvSciSyncIpcImportFence(consumerWaitObj,
&packet.fenceDesc,
&buffers[packet.bufferId].fence);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Failure to import fence from producer (%x)\n",
sciErr);
goto done;
}
// copy CRC data from packet
buffers[packet.bufferId].crc = packet.crc;
// Mark consumer as owner of this buffer
buffers[packet.bufferId].owner = 1U;
}
// Wait for fence generated by producer before reading
if (!waitCudaFence(&cudaInfo, buf)) {
goto done;
}
// Read the buffer to the local copy
cudaErr = cudaMemcpy2DAsync(cudaInfo.bufCopy,
cudaInfo.bufSize,
buf->ptr,
cudaInfo.bufSize,
cudaInfo.bufSize,
1,
cudaMemcpyDeviceToHost,
cudaInfo.stream);
if (cudaSuccess != cudaErr) {
fprintf(stderr, "Unable to initiate CUDA copy (%d)\n", cudaErr);
goto done;
}
// Wait for operation to finish, then compute and compare checksum
// IMPORTANT NOTE:
// A normal stream application would not perform these steps.
// A checksum is not required for streaming, and waiting for
// operations to finish (which we only need because the
// checksum is calculated by the CPU) introduces bubbles
// in the hardware pipeline. A real application can rely on
// the generated NvSciSync fences for synchronization.
// These steps are only taken in this sample application
// because the consumer has no output visible to the user,
// so the checksum allows us to verify that the application
// is behaving properly.
cudaDeviceSynchronize();
uint32_t crc = GenerateCRC(cudaInfo.bufCopy,
1,
cudaInfo.bufSize,
cudaInfo.bufSize);
if (buf->crc != crc) {
fprintf(stderr, "Checksums don't match (%x vs %x)\n",
crc, buf->crc);
goto done;
}
fprintf(stderr, "Consumer read frame %d in buffer %d\n",
currFrame, currBuffer);
// Generate new fence indicating when reading has finished
if (!signalCudaFence(&cudaInfo, buf)) {
goto done;
}
// Mark buffer as owned by producer now
buf->owner = 0U;
// Export buffer index and fence for transmission over IPC
// There is no checksum for the return trip.
packet.bufferId = currBuffer;
packet.crc = 0U;
sciErr = NvSciSyncIpcExportFence(&buf->fence,
ipcWrapper.endpoint,
&packet.fenceDesc);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to export consumer fence (%x)\n", sciErr);
goto done;
}
// Send buffer index and fence to producer
sciErr = ipcSend(&ipcWrapper, &packet, sizeof(packet));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Failure to send buffer to producer (%x)\n",
sciErr);
goto done;
}
fprintf(stderr, "Consumer finished frame %d in buffer %d\n",
currFrame, currBuffer);
// Advance buffer and frame
currBuffer = (currBuffer + 1U) % totalBuffers;
currFrame++;
}
// Success
*(int*)arg = 0;
done:
// Free CUDA resources
deinitCudaBuffer(buffers, totalBuffers);
deinitCuda(&cudaInfo);
// Free NvSci objects
if (NULL != consumerSignalAttrs)
NvSciSyncAttrListFree(consumerSignalAttrs);
if (NULL != consumerWaitAttrs)
NvSciSyncAttrListFree(consumerWaitAttrs);
if (NULL != sendWaitListDesc)
NvSciSyncAttrListFreeDesc(sendWaitListDesc);
if (NULL != producerWaitAttrs)
NvSciSyncAttrListFree(producerWaitAttrs);
if (NULL != consToProdAttrs)
NvSciSyncAttrListFree(consToProdAttrs);
if (NULL != syncConflictAttrs)
NvSciSyncAttrListFree(syncConflictAttrs);
if (NULL != consumerSignalObj)
NvSciSyncObjFree(consumerSignalObj);
if (NULL != sendObjAndListDesc)
NvSciSyncAttrListAndObjFreeDesc(sendObjAndListDesc);
if (NULL != consumerWaitObj)
NvSciSyncObjFree(consumerWaitObj);
if (NULL != consumerReadAttrs)
NvSciBufAttrListFree(consumerReadAttrs);
if (NULL != consumerReadAttrsDesc)
NvSciBufAttrListFreeDesc(consumerReadAttrsDesc);
if (NULL != combinedBufAttrs)
NvSciBufAttrListFree(combinedBufAttrs);
// Free malloc'd resources
if (NULL != recvWaitListDesc)
free(recvWaitListDesc);
if (NULL != recvObjAndListDesc)
free(recvObjAndListDesc);
if (NULL != recvBufListDesc)
free(recvBufListDesc);
fprintf(stderr, "Consumer exiting\n");
return NULL;
}

234
rawstream/rawstream_cuda.c Normal file
View File

@@ -0,0 +1,234 @@
/*
* Copyright (c) 2020-2021 NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "rawstream.h"
// Initialize CUDA info
bool setupCuda(CudaClientInfo* info)
{
int cudaErr;
info->deviceId = 0;
info->stream = NULL;
info->signalerSem = NULL;
info->waiterSem = NULL;
info->bufCopy = NULL;
int numOfGPUs = 0;
cudaErr = cudaGetDeviceCount(&numOfGPUs);
if (cudaSuccess != cudaErr) {
fprintf(stderr, "Failed to get compute-capable devices (%d)\n", cudaErr);
return false;
}
cudaErr = cudaSetDevice(info->deviceId);
if (cudaSuccess != cudaErr) {
fprintf(stderr, "Failed to set CUDA device (%d)\n", cudaErr);
return false;
}
cudaErr = cuDeviceGetUuid(&info->uuid, info->deviceId);
if (CUDA_SUCCESS != cudaErr) {
fprintf(stderr, "Failed to query CUDA UUID (%d)\n", cudaErr);
return false;
}
return true;
}
// Create CUDA sync objects and map to imported NvSciSync
bool setupCudaSync(CudaClientInfo* info,
NvSciSyncObj sciSignalObj,
NvSciSyncObj sciWaitObj)
{
cudaExternalSemaphoreHandleDesc extSemDesc;
int cudaErr;
// Create CUDA stream for signaling and waiting
cudaErr = cudaStreamCreateWithFlags(&info->stream,
cudaStreamNonBlocking);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Unable to create CUDA stream (%d)\n",
cudaErr);
return false;
}
// Import signaler sync object to CUDA semaphore
memset(&extSemDesc, 0, sizeof(extSemDesc));
extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
extSemDesc.handle.nvSciSyncObj = sciSignalObj;
cudaErr = cudaImportExternalSemaphore(&info->signalerSem, &extSemDesc);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Unable to import signal sync object to CUDA (%d)\n",
cudaErr);
return false;
}
// Import waiter sync object to CUDA semaphore
memset(&extSemDesc, 0, sizeof(extSemDesc));
extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
extSemDesc.handle.nvSciSyncObj = sciWaitObj;
cudaErr = cudaImportExternalSemaphore(&info->waiterSem, &extSemDesc);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Unable to import wait sync object to CUDA (%d)\n",
cudaErr);
return false;
}
return true;
}
// Extract info from buffer attributes needed by CUDA
bool setupCudaBufAttr(CudaClientInfo* info,
NvSciBufAttrList attrs)
{
NvSciBufAttrKeyValuePair queryKeyValue[] = {
{ NvSciBufGeneralAttrKey_Types, NULL, 0 },
{ NvSciBufRawBufferAttrKey_Size, NULL, 0 },
};
NvSciError sciErr = NvSciBufAttrListGetAttrs(attrs, queryKeyValue, 2);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to query buffer type/size (%x)\n", sciErr);
return false;
}
// TODO: Original sample queries BufType but doesn't seem to do anything
// with it. Might not be needed.
info->bufType = *((NvSciBufType*)(queryKeyValue[0].value));
info->bufSize = *((uint64_t*)(queryKeyValue[1].value));
// Allocate storage for a copy of the buffer contents
info->bufCopy = (uint8_t*)malloc(info->bufSize);
if (NULL == info->bufCopy) {
fprintf(stderr, "Unable to allocate buffer copy\n");
return false;
}
(void)memset(info->bufCopy, 0, info->bufSize);
return true;
}
// Import NvSciBuf into CUDA
bool setupCudaBuffer(CudaClientInfo* info,
Buffer* buf)
{
int cudaErr;
// Import buffer to cuda as external memory
cudaExternalMemoryHandleDesc memHandleDesc;
memset(&memHandleDesc, 0, sizeof(memHandleDesc));
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
memHandleDesc.handle.nvSciBufObject = buf->obj;
memHandleDesc.size = info->bufSize;
cudaErr = cudaImportExternalMemory(&buf->extMem, &memHandleDesc);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Unable to import buffer to CUDA (%d)\n",
cudaErr);
return false;
}
// Map to cuda memory buffer
cudaExternalMemoryBufferDesc bufferDesc;
memset(&bufferDesc, 0, sizeof(bufferDesc));
bufferDesc.size = info->bufSize;
bufferDesc.offset = 0;
cudaErr = cudaExternalMemoryGetMappedBuffer((void *)&buf->ptr,
buf->extMem,
&bufferDesc);
if (cudaSuccess != cudaErr) {
fprintf(stderr, "Unable to map CUDA buffer (%d)\n", cudaErr);
return false;
}
return true;
}
// Tell CUDA to wait for the fence associated with a buffer
bool waitCudaFence(CudaClientInfo* info,
Buffer* buf)
{
cudaExternalSemaphoreWaitParams waitParams;
memset(&waitParams, 0, sizeof(waitParams));
waitParams.params.nvSciSync.fence = &buf->fence;
waitParams.flags = 0;
int cudaErr = cudaWaitExternalSemaphoresAsync(&info->waiterSem,
&waitParams,
1,
info->stream);
if (cudaSuccess != cudaErr) {
fprintf(stderr, "Unable to wait for fence (%d)\n", cudaErr);
return false;
}
NvSciSyncFenceClear(&buf->fence);
return true;
}
// Tell CUDA to generate a fence for a buffer
bool signalCudaFence(CudaClientInfo* info,
Buffer* buf)
{
cudaExternalSemaphoreSignalParams signalParams;
memset(&signalParams, 0, sizeof(signalParams));
signalParams.params.nvSciSync.fence = &buf->fence;
signalParams.flags = 0;
int cudaErr = cudaSignalExternalSemaphoresAsync(&info->signalerSem,
&signalParams,
1,
info->stream);
if (cudaSuccess != cudaErr) {
fprintf(stderr, "Unable to signal fence (%d)\n", cudaErr);
return false;
}
return true;
}
void deinitCuda(CudaClientInfo* info)
{
if (NULL != info->bufCopy) {
free(info->bufCopy);
info->bufCopy = NULL;
}
if (NULL != info->signalerSem) {
(void)cudaDestroyExternalSemaphore(info->signalerSem);
info->signalerSem = NULL;
}
if (NULL != info->waiterSem) {
(void)cudaDestroyExternalSemaphore(info->waiterSem);
info->waiterSem = NULL;
}
if (NULL != info->stream) {
(void)cudaStreamDestroy(info->stream);
info->stream = NULL;
}
}
void deinitCudaBuffer(Buffer* buf, int num)
{
int i;
for (i = 0; i < num; ++i) {
if (NULL != buf[i].ptr)
cudaFree(buf[i].ptr);
if (NULL != buf[i].extMem)
(void)cudaDestroyExternalMemory(buf[i].extMem);
if (NULL != buf[i].obj)
NvSciBufObjFree(buf[i].obj);
}
}

View File

@@ -0,0 +1,172 @@
/*
* Copyright (c) 2020-2022 NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "rawstream.h"
// Initialize one end of named communcation channel
NvSciError ipcInit(const char* endpointName, IpcWrapper* ipcWrapper)
{
NvSciError err = NvSciError_Success;
// Open named endpoint
err = NvSciIpcOpenEndpoint(endpointName, &ipcWrapper->endpoint);
if (err != NvSciError_Success) {
fprintf(stderr, "Unable to open endpoint %s (%x)\n",
endpointName, err);
goto fail;
}
// initialize IPC event notifier
err = NvSciIpcGetLinuxEventFd(ipcWrapper->endpoint, &ipcWrapper->ipcEventFd);
if (err != NvSciError_Success) {
fprintf(stderr, "Unable to get Linux event fd (%x)\n", err);
goto fail;
}
// Retrieve endpoint info
err = NvSciIpcGetEndpointInfo(ipcWrapper->endpoint, &ipcWrapper->info);
if (NvSciError_Success != err) {
fprintf(stderr, "Unable to retrieve IPC endpoint info (%x)", err);
goto fail;
}
err = NvSciIpcResetEndpointSafe(ipcWrapper->endpoint);
if (NvSciError_Success != err) {
fprintf(stderr, "Unable to reset IPC endpoint (%x)", err);
}
fail:
return err;
}
// Clean up IPC when done
void ipcDeinit(IpcWrapper* ipcWrapper)
{
NvSciError err = NvSciIpcCloseEndpointSafe(ipcWrapper->endpoint, false);
if (NvSciError_Success != err) {
fprintf(stderr, "NvSciIpcCloseEndpointSafe failed (%x)\n", err);
}
}
// Wait for an event on IPC channel
static NvSciError waitEvent(IpcWrapper* ipcWrapper, uint32_t value)
{
fd_set rfds;
uint32_t event = 0;
NvSciError err;
while (true) {
// Get pending IPC events
err = NvSciIpcGetEventSafe(ipcWrapper->endpoint, &event);
if (NvSciError_Success != err) {
fprintf(stderr, "NvSciIpcGetEventSafe failed (%x)\n", err);
return err;
}
// Return if event is the kind we're looking for
if (0U != (event & value)) {
break;
}
FD_ZERO(&rfds);
FD_SET(ipcWrapper->ipcEventFd, &rfds);
// Wait for signalling indicating new event
if (select(ipcWrapper->ipcEventFd + 1, &rfds, NULL, NULL, NULL) < 0) {
// select failed
return NvSciError_ResourceError;
}
if(!FD_ISSET(ipcWrapper->ipcEventFd, &rfds)) {
return NvSciError_NvSciIpcUnknown;
}
}
return NvSciError_Success;
}
// Send a message over IPC
NvSciError ipcSend(IpcWrapper* ipcWrapper, const void* buf, const size_t size)
{
NvSciError err = NvSciError_Success;
bool done = false;
uint32_t bytes;
// Loop until entire message sent
while (done == false) {
// Wait for room in channel to send a message
err = waitEvent(ipcWrapper, NV_SCI_IPC_EVENT_WRITE);
if (NvSciError_Success != err) {
goto fail;
}
assert(size <= UINT32_MAX);
// Send as much of the message as we can
err = NvSciIpcWriteSafe(ipcWrapper->endpoint, buf, (uint32_t)size,
&bytes);
if (NvSciError_Success != err) {
fprintf(stderr, "IPC write failed (%x)\n", err);
goto fail;
}
// For this simple sample, we just fail if the entire message wasn't
// sent. Could instead retry to send the rest.
if (size != (size_t)bytes) {
fprintf(stderr, "Failed to send entire message (%u < %zu)\n",
bytes, size);
err = NvSciError_NvSciIpcUnknown;
goto fail;
}
done = true;
}
fail:
return err;
}
// Receive a message over IPC
NvSciError ipcRecvFill(IpcWrapper* ipcWrapper, void* buf, const size_t size)
{
NvSciError err = NvSciError_Success;
bool done = false;
uint32_t bytes;
// Loop until entire message received
while (done == false) {
// Wait for incoming data
err = waitEvent(ipcWrapper, NV_SCI_IPC_EVENT_READ);
if (NvSciError_Success != err) {
goto fail;
}
assert(size <= UINT32_MAX);
// Read as much of the message as we can
err = NvSciIpcReadSafe(ipcWrapper->endpoint, buf, (uint32_t)size,
&bytes);
if (NvSciError_Success != err) {
fprintf(stderr, "IPC read failed (%x)\n", err);
goto fail;
}
// For this simple sample, we just fail if the entire message wasn't
// read. Could instead retry to receive the rest.
if (size != (size_t)bytes) {
fprintf(stderr, "Failed to read entire message (%u < %zu)\n",
bytes, size);
err = NvSciError_NvSciIpcUnknown;
goto fail;
}
done = true;
}
fail:
return err;
}

209
rawstream/rawstream_main.c Normal file
View File

@@ -0,0 +1,209 @@
/*
* Copyright (c) 2020 NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "rawstream.h"
NvSciSyncModule syncModule = NULL;
NvSciBufModule bufModule = NULL;
NvSciSyncAttrList producerSignalAttrs = NULL;
NvSciSyncAttrList consumerSignalAttrs = NULL;
NvSciSyncAttrList producerWaitAttrs = NULL;
NvSciSyncAttrList consumerWaitAttrs = NULL;
NvSciSyncAttrList prodToConsAttrs = NULL;
NvSciSyncAttrList consToProdAttrs = NULL;
NvSciSyncObj consumerSignalObj = NULL;
NvSciSyncObj producerSignalObj = NULL;
NvSciSyncObj consumerWaitObj = NULL;
NvSciSyncObj producerWaitObj = NULL;
NvSciBufAttrList producerWriteAttrs = NULL;
NvSciBufAttrList consumerReadAttrs = NULL;
NvSciBufAttrList combinedBufAttrs = NULL;
Buffer buffers[totalBuffers];
IpcWrapper ipcWrapper;
int main(int argc, char *argv[])
{
NvSciError err;
int producer;
const char* endpoint;
int ret = 0;
if ((argc == 2) && (strcmp(argv[1], "-p") == 0)){
producer = 1;
endpoint = "Producer";
} else if ((argc == 2) && (strcmp(argv[1], "-c") == 0)) {
producer = 0;
endpoint = "Consumer";
} else {
fprintf(stderr,
"The usage of the app is ./rawstream followed by -p or -c\n");
fprintf(stderr,
" -p denotes producer and -c denotes consumer\n");
return 1;
}
fprintf(stderr, "%p application starting\n", endpoint);
// Open sync module (shared by both all threads)
err = NvSciSyncModuleOpen(&syncModule);
if (NvSciError_Success != err) {
fprintf(stderr, "%s unable to open sync module (%x)\n",
endpoint, err);
return 1;
}
// Open buf module (shared by both all threads)
err = NvSciBufModuleOpen(&bufModule);
if (NvSciError_Success != err) {
fprintf(stderr, "%s unable to open buf module (%x)\n",
endpoint, err);
ret = 1;
goto close_sync_module;
}
// Initialize IPC library
err = NvSciIpcInit();
if (NvSciError_Success != err) {
fprintf(stderr, "%s unable to init ipc library (%x)\n",
endpoint, err);
ret = 1;
goto close_buf_module;
}
// Establish IPC communications based on endpoint
// TODO: Settle on final IPC channel names
if (producer == 1) {
err = ipcInit("nvscisync_a_0", &ipcWrapper);
} else {
err = ipcInit("nvscisync_a_1", &ipcWrapper);
}
if (NvSciError_Success != err) {
fprintf(stderr, "%s unable to initialize communication (%x)\n",
endpoint, err);
ret = 1;
goto deinit_IPC;
}
// Test communication by exchanging a simple handshake message
const int send_handshake = 12345;
err = ipcSend(&ipcWrapper, &send_handshake, sizeof(send_handshake));
if (NvSciError_Success != err) {
fprintf(stderr, "%s failed to send handshake (%x)\n",
endpoint, err);
ret = 1;
goto deinit_IPC;
}
int recv_handshake = 0;
ipcRecvFill(&ipcWrapper, &recv_handshake, sizeof(recv_handshake));
if (NvSciError_Success != err) {
fprintf(stderr, "%s failed to receive handshake (%x)\n",
endpoint, err);
ret = 1;
goto deinit_IPC;
}
if (send_handshake != recv_handshake) {
fprintf(stderr, "%s handshake did not match (%x)\n",
endpoint, err);
ret = 1;
goto deinit_IPC;
}
// Initialize buffer list
for (uint32_t i=0; i<totalBuffers; ++i) {
buffers[i].owner = 0;
buffers[i].fence = NvSciSyncFenceInitializer;
}
// Launch appropriate thread
if (producer == 1) {
// Launch producer threads
pthread_t producerThread;
if (0 != pthread_create(&producerThread, NULL, producerFunc, &ret)) {
fprintf(stderr, "Failed to launch producer\n");
ret = 1;
goto deinit_IPC;
}
// Wait for thread to finish
(void)pthread_join(producerThread, NULL);
} else {
// Launch consumer threads
pthread_t consumerThread;
if (0 != pthread_create(&consumerThread, NULL, consumerFunc, &ret)) {
fprintf(stderr, "Failed to launch consumer\n");
ret = 1;
goto deinit_IPC;
}
// Wait for thread to finish
(void)pthread_join(consumerThread, NULL);
}
deinit_IPC:
ipcDeinit(&ipcWrapper);
(void)NvSciIpcDeinit();
close_buf_module:
(void)NvSciBufModuleClose(bufModule);
close_sync_module:
(void)NvSciSyncModuleClose(syncModule);
fprintf(stderr, "Sample completed\n");
return ret;
}
// Checksum calculation
#define CRC32_POLYNOMIAL 0xEDB88320L
uint32_t GenerateCRC(uint8_t* data_ptr,
uint32_t height,
uint32_t width,
uint32_t pitch)
{
uint32_t y = 0U, x = 0U;
uint32_t crc = 0U, tmp;
static uint32_t crcTable[256];
static int initialized = 0;
//Initilaize CRC table, which is an one time operation
if (!initialized) {
for (int i = 0; i <= 255; i++) {
tmp = i;
for (int j = 8; j > 0; j--) {
if (tmp & 1) {
tmp = (tmp >> 1) ^ CRC32_POLYNOMIAL;
} else {
tmp >>= 1;
}
}
crcTable[i] = tmp;
}
initialized = 1;
}
//Calculate CRC for the data
for (y = 0U; y < height; y++) {
for (x = 0U; x < width; x++) {
tmp = (crc >> 8) & 0x00FFFFFFL;
crc = tmp ^ crcTable[((uint32_t) crc ^ *(data_ptr + x)) & 0xFF];
}
data_ptr += pitch;
}
return crc;
}

View File

@@ -0,0 +1,617 @@
/*
* Copyright (c) 2020-2021 NVIDIA Corporation. All Rights Reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation. Any
* use, reproduction, disclosure or distribution of this software and related
* documentation without an express license agreement from NVIDIA Corporation
* is strictly prohibited.
*/
#include "rawstream.h"
void* producerFunc(void* arg)
{
CudaClientInfo cudaInfo;
NvSciError sciErr;
int cudaErr;
*(int*)arg = 1;
fprintf(stderr, "Producer starting\n");
// Do common cuda initialization
if (!setupCuda(&cudaInfo)) {
goto done;
}
// Create an empty sync attribute list for signaling permissions.
sciErr = NvSciSyncAttrListCreate(syncModule, &producerSignalAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to create producer signal attrs (%x)\n",
sciErr);
goto done;
}
// Query CUDA for attributes needed to signal syncs
cudaErr = cudaDeviceGetNvSciSyncAttributes(producerSignalAttrs,
cudaInfo.deviceId,
cudaNvSciSyncAttrSignal);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Could not query signal attributes from CUDA (%d)\n",
cudaErr);
goto done;
}
fprintf(stderr, "Producer signal attributes established\n");
// Create an empty sync attribute list for waiting permissions.
sciErr = NvSciSyncAttrListCreate(syncModule, &producerWaitAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to create producer wait attrs (%x)\n",
sciErr);
goto done;
}
// Query CUDA for attributes needed to wait for syncs
cudaErr = cudaDeviceGetNvSciSyncAttributes(producerWaitAttrs,
cudaInfo.deviceId,
cudaNvSciSyncAttrWait);
if (cudaSuccess != cudaErr) {
fprintf(stderr,
"Could not query wait attributes from CUDA (%d)\n",
cudaErr);
goto done;
}
fprintf(stderr, "Producer wait attributes established\n");
// Export producer's wait attributes to a form suitable for IPC
size_t sendWaitAttrListSize = 0U;
void* sendWaitListDesc = NULL;
sciErr = NvSciSyncAttrListIpcExportUnreconciled(&producerWaitAttrs,
1,
ipcWrapper.endpoint,
&sendWaitListDesc,
&sendWaitAttrListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to export producer wait attrs (%x)\n",
sciErr);
goto done;
}
// Send the size of the producer's wait attributes to the consumer,
// so it knows how much data to expect
sciErr = ipcSend(&ipcWrapper,
&sendWaitAttrListSize,
sizeof(sendWaitAttrListSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to send producer wait attrs size (%x)\n",
sciErr);
goto done;
}
// Send the exported form of the producer's wait attributes
sciErr = ipcSend(&ipcWrapper,
sendWaitListDesc,
sendWaitAttrListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to send producer wait attrs (%x)\n",
sciErr);
goto done;
}
// Wait to receive the size of the consumer's wait attributes
size_t recvWaitAttrListSize = 0U;
sciErr = ipcRecvFill(&ipcWrapper,
&recvWaitAttrListSize,
sizeof(recvWaitAttrListSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to recv consumer wait attr size (%x)\n",
sciErr);
goto done;
}
// Allocate a buffer big enough to receive the consumer's wait attributes
void* recvWaitListDesc = malloc(recvWaitAttrListSize);
if (NULL == recvWaitListDesc) {
sciErr = NvSciError_InsufficientMemory;
fprintf(stderr,
"Sync attr allocation failed (%x)\n",
sciErr);
goto done;
}
// Wait to receive consumer's wait attributes
sciErr = ipcRecvFill(&ipcWrapper,
recvWaitListDesc,
recvWaitAttrListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to recv consumer wait attrs (%x)\n",
sciErr);
goto done;
}
// Convert the received consumer wait attributes to an attribute list
sciErr = NvSciSyncAttrListIpcImportUnreconciled(syncModule,
ipcWrapper.endpoint,
recvWaitListDesc,
recvWaitAttrListSize,
&consumerWaitAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to import consumer wait attrs (%x)\n",
sciErr);
goto done;
}
// Get combined attributes for producer to consumer signaling
NvSciSyncAttrList syncAllAttrs[2], syncConflictAttrs;
syncAllAttrs[0] = producerSignalAttrs;
syncAllAttrs[1] = consumerWaitAttrs;
sciErr = NvSciSyncAttrListReconcile(syncAllAttrs,
2,
&prodToConsAttrs,
&syncConflictAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't merge producer->consumer attrs (%x)\n",
sciErr);
goto done;
}
// Allocate producer to consumer sync object
sciErr = NvSciSyncObjAlloc(prodToConsAttrs, &producerSignalObj);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't allocate producer->consumer sync (%x)\n",
sciErr);
goto done;
}
// Export sync attributes and object to a form suitable for IPC
void* sendObjAndListDesc = NULL;
size_t sendObjAndListSize = 0U;
sciErr = NvSciSyncIpcExportAttrListAndObj(producerSignalObj,
NvSciSyncAccessPerm_WaitOnly,
ipcWrapper.endpoint,
&sendObjAndListDesc,
&sendObjAndListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't export producer->consumer sync description (%x)\n",
sciErr);
goto done;
}
// Send the size of the sync description to the consumer,
// so it knows how much data to expect
sciErr = ipcSend(&ipcWrapper, &sendObjAndListSize, sizeof(size_t));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't send producer->consumer sync description size(%x)\n",
sciErr);
goto done;
}
// Send the sync description to the consumer
sciErr = ipcSend(&ipcWrapper, sendObjAndListDesc, sendObjAndListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't send producer->consumer sync description (%x)\n",
sciErr);
goto done;
}
// Wait to receive the size of the consumer->producer sync desription
size_t recvObjAndListSize = 0U;
sciErr = ipcRecvFill(&ipcWrapper,
&recvObjAndListSize,
sizeof(size_t));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't recv consumer->produce sync description size (%x)\n",
sciErr);
goto done;
}
// Allocate a buffer big enough to receive the description
void* recvObjAndListDesc = malloc(recvObjAndListSize);
if (NULL == recvObjAndListDesc) {
sciErr = NvSciError_InsufficientMemory;
fprintf(stderr,
"Sync description allocation failed (%x)\n",
sciErr);
goto done;
}
// Wait to receive consumer->producer sync description
sciErr = ipcRecvFill(&ipcWrapper,
recvObjAndListDesc,
recvObjAndListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't receive consumer->producer sync description (%x)\n",
sciErr);
goto done;
}
// Convert the received consumer->producer sync description to a
// sync attribute list and object
sciErr = NvSciSyncIpcImportAttrListAndObj(syncModule,
ipcWrapper.endpoint,
recvObjAndListDesc,
recvObjAndListSize,
&producerWaitAttrs,
1,
NvSciSyncAccessPerm_WaitOnly,
ipcWrapper.endpoint,
&producerWaitObj);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Can't import consumer->producer sync (%x)\n", sciErr);
goto done;
}
// Set up CUDA sync objects, importing NvSciSync objects
if (!setupCudaSync(&cudaInfo, producerSignalObj, producerWaitObj)) {
goto done;
}
fprintf(stderr, "Producer exchanged sync objects with consumer\n");
// Create an empty buffer attribute list for producer buffers
sciErr = NvSciBufAttrListCreate(bufModule, &producerWriteAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to create producer buffer attrs (%x)\n",
sciErr);
goto done;
}
// Fill producer buffer attribute list with values
NvSciBufAttrKeyValuePair bufKeyValue[6];
NvSciRmGpuId gpuId;
memcpy(&gpuId.bytes, &cudaInfo.uuid.bytes, sizeof(cudaInfo.uuid.bytes));
bufKeyValue[0].key = NvSciBufGeneralAttrKey_GpuId;
bufKeyValue[0].value = &gpuId;
bufKeyValue[0].len = sizeof(gpuId);
NvSciBufType bufType = NvSciBufType_RawBuffer;
bufKeyValue[1].key = NvSciBufGeneralAttrKey_Types;
bufKeyValue[1].value = &bufType;
bufKeyValue[1].len = sizeof(bufType);
NvSciBufAttrValAccessPerm bufPerm = NvSciBufAccessPerm_ReadWrite;
bufKeyValue[2].key = NvSciBufGeneralAttrKey_RequiredPerm;
bufKeyValue[2].value = &bufPerm;
bufKeyValue[2].len = sizeof(bufPerm);
bool bufAccessFlag = true;
bufKeyValue[3].key = NvSciBufGeneralAttrKey_NeedCpuAccess;
bufKeyValue[3].value = &bufAccessFlag;
bufKeyValue[3].len = sizeof(bufAccessFlag);
uint64_t rawsize = (128 * 1024);
bufKeyValue[4].key = NvSciBufRawBufferAttrKey_Size;
bufKeyValue[4].value = &rawsize;
bufKeyValue[4].len = sizeof(rawsize);
uint64_t align = (4 * 1024);
bufKeyValue[5].key = NvSciBufRawBufferAttrKey_Align;
bufKeyValue[5].value = &align;
bufKeyValue[5].len = sizeof(align);
sciErr = NvSciBufAttrListSetAttrs(producerWriteAttrs, bufKeyValue, 6);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to fill producer buffer attrs (%x)\n", sciErr);
goto done;
}
fprintf(stderr, "Producer buffer attributes established\n");
// Wait to receive the size of the consumer's buffer attributes
size_t consumerReadAttrsSize = 0U;
sciErr = ipcRecvFill(&ipcWrapper,
&consumerReadAttrsSize,
sizeof(consumerReadAttrsSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to recv consumer buffer attr size (%x)\n",
sciErr);
goto done;
}
// Allocate a buffer big enough to receive the consumer's buffer attributes
void* consumerReadAttrsDesc = malloc(consumerReadAttrsSize);
if (NULL == recvWaitListDesc) {
sciErr = NvSciError_InsufficientMemory;
fprintf(stderr, "Buffer attr allocation failed(%x)\n", sciErr);
goto done;
}
// Wait to receive the consumer's buffer attributes
sciErr = ipcRecvFill(&ipcWrapper,
consumerReadAttrsDesc,
consumerReadAttrsSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to recv consumer buffer attrs (%x)\n", sciErr);
goto done;
}
// Convert the received consumer buffer attributes to an attribute list
sciErr = NvSciBufAttrListIpcImportUnreconciled(bufModule,
ipcWrapper.endpoint,
consumerReadAttrsDesc,
consumerReadAttrsSize,
&consumerReadAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to import consumer buffer attrs (%x)\n",
sciErr);
goto done;
}
// Get combined attributes for buffers
NvSciBufAttrList bufAllAttrs[2], bufConflictAttrs;
bufAllAttrs[0] = producerWriteAttrs;
bufAllAttrs[1] = consumerReadAttrs;
sciErr = NvSciBufAttrListReconcile(bufAllAttrs, 2,
&combinedBufAttrs, &bufConflictAttrs);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Can't merge buffer attrs (%x)\n", sciErr);
goto done;
}
// Export combined buffer attributes to a form suitable for IPC
void* sendBufListDesc = NULL;
size_t sendBufListSize = 0U;
sciErr = NvSciBufAttrListIpcExportReconciled(combinedBufAttrs,
ipcWrapper.endpoint,
&sendBufListDesc,
&sendBufListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Can't export reconciled buffer attrs to consumer (%x)\n",
sciErr);
goto done;
}
// Send the size of the combined buffer attributes to the consumer,
// so it knows how much data to expect
sciErr = ipcSend(&ipcWrapper,
&sendBufListSize,
sizeof(sendBufListSize));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to send combined buffer attrs size (%x)\n",
sciErr);
goto done;
}
// Send the exported form of the combined buffer attributes
sciErr = ipcSend(&ipcWrapper,
sendBufListDesc,
sendBufListSize);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to send combined buffer attrs (%x)\n", sciErr);
goto done;
}
// Extract attributes needed by CUDA
if (!setupCudaBufAttr(&cudaInfo, combinedBufAttrs)) {
goto done;
}
// Allocate all buffers
for (uint32_t i=0U; i<totalBuffers; ++i) {
Buffer* buf = &buffers[i];
// Allocate the buffer
sciErr = NvSciBufObjAlloc(combinedBufAttrs, &buf->obj);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Can't allocate buffer %d (%x)\n", i, sciErr);
goto done;
}
// Export buffer object to a form suitable for IPC
// Note: Unlike attribute lists, the exported form of objects has
// a fixed size.
NvSciBufObjIpcExportDescriptor objDesc;
sciErr = NvSciBufObjIpcExport(buf->obj,
NvSciBufAccessPerm_ReadWrite,
ipcWrapper.endpoint,
&objDesc);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Unable to export buffer %d object to consumer (%x)\n",
i, sciErr);
goto done;
}
// Send the buffer description to the consumer
sciErr = ipcSend(&ipcWrapper,
&objDesc,
sizeof(NvSciBufObjIpcExportDescriptor));
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to send buffer %d (%x)\n", i, sciErr);
goto done;
}
// Import the buffer into CUDA
if (!setupCudaBuffer(&cudaInfo, buf)) {
goto done;
}
}
fprintf(stderr, "Producer buffers established and transmitted\n");
// Send all frames
uint32_t currFrame = 0;
uint32_t currBuffer = 0;
Packet packet;
while (currFrame < totalFrames) {
fprintf(stderr, "Producer starting frame %d in buffer %d\n",
currFrame, currBuffer);
Buffer* buf = &buffers[currBuffer];
// Wait for buffer to be available
// Note: On first frame for each buffer, the producer already owns
// it, so this is skipped. On subsequent frames it must wait
// for the buffer's return.
while (buf->owner != 0U) {
// Wait for next returned buffer
sciErr = ipcRecvFill(&ipcWrapper, &packet, sizeof(packet));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Failure to recv buffer from consumer (%x)\n",
sciErr);
goto done;
}
// Import transmitted fence description to a fence
sciErr = NvSciSyncIpcImportFence(producerWaitObj,
&packet.fenceDesc,
&buffers[packet.bufferId].fence);
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Failure to import fence from consumer (%x)\n",
sciErr);
goto done;
}
// Extract checksum from packet
buffers[packet.bufferId].crc = packet.crc;
// Mark producer as owner of this buffer
buffers[packet.bufferId].owner = 0U;
}
// Wait for fence returned by consumer before rendering
if (!waitCudaFence(&cudaInfo, buf)) {
goto done;
}
// CUDA rendering to buffer
(void)memset(cudaInfo.bufCopy, (currFrame & 0xFF), cudaInfo.bufSize);
cudaErr = cudaMemcpy2DAsync(buf->ptr,
cudaInfo.bufSize,
cudaInfo.bufCopy,
cudaInfo.bufSize,
cudaInfo.bufSize,
1,
cudaMemcpyHostToDevice,
cudaInfo.stream);
if (cudaSuccess != cudaErr) {
fprintf(stderr, "Unable to initiate CUDA copy (%d)\n", cudaErr);
goto done;
}
// Generate new fence for the sync object
if (!signalCudaFence(&cudaInfo, buf)) {
goto done;
}
// Wait for operation to finish and compute checksum
// IMPORTANT NOTE:
// A normal stream application would not perform these steps.
// A checksum is not required for streaming, and waiting for
// operations to finish (which we only need because the
// checksum is calculated by the CPU) introduces bubbles
// in the hardware pipeline. A real application can rely on
// the generated NvSciSync fences for synchronization.
// These steps are only taken in this sample application
// because the consumer has no output visible to the user,
// so the checksum allows us to verify that the application
// is behaving properly.
cudaDeviceSynchronize();
buf->crc = GenerateCRC(cudaInfo.bufCopy,
1,
cudaInfo.bufSize,
cudaInfo.bufSize);
fprintf(stderr, "Producer wrote frame %d in buffer %d\n",
currFrame, currBuffer);
// Mark buffer as owned by consumer now
buf->owner = 1U;
// Export buffer index, checksum, and fence for transmission over IPC
packet.bufferId = currBuffer;
packet.crc = buf->crc;
sciErr = NvSciSyncIpcExportFence(&buf->fence,
ipcWrapper.endpoint,
&packet.fenceDesc);
if (NvSciError_Success != sciErr) {
fprintf(stderr, "Unable to export producer fence (%x)\n", sciErr);
goto done;
}
// Send buffer index and fence to consumer
sciErr = ipcSend(&ipcWrapper, &packet, sizeof(packet));
if (NvSciError_Success != sciErr) {
fprintf(stderr,
"Failure to send buffer to consumer (%x)\n",
sciErr);
goto done;
}
fprintf(stderr, "Producer finished frame %d in buffer %d\n",
currFrame, currBuffer);
// Advance buffer and frame
currBuffer = (currBuffer + 1U) % totalBuffers;
currFrame++;
}
// Success
*(int*)arg = 0;
done:
// Free CUDA resources
deinitCudaBuffer(buffers, totalBuffers);
deinitCuda(&cudaInfo);
// Free NvSci objects
if (NULL != producerSignalAttrs)
NvSciSyncAttrListFree(producerSignalAttrs);
if (NULL != consumerWaitAttrs)
NvSciSyncAttrListFree(consumerWaitAttrs);
if (NULL != sendWaitListDesc)
NvSciSyncAttrListFreeDesc(sendWaitListDesc);
if (NULL != producerWaitAttrs)
NvSciSyncAttrListFree(producerWaitAttrs);
if (NULL != prodToConsAttrs)
NvSciSyncAttrListFree(prodToConsAttrs);
if (NULL != syncConflictAttrs)
NvSciSyncAttrListFree(syncConflictAttrs);
if (NULL != producerSignalObj)
NvSciSyncObjFree(producerSignalObj);
if (NULL != sendObjAndListDesc)
NvSciSyncAttrListAndObjFreeDesc(sendObjAndListDesc);
if (NULL != producerWaitObj)
NvSciSyncObjFree(producerWaitObj);
if (NULL != producerWriteAttrs)
NvSciBufAttrListFree(producerWriteAttrs);
if (NULL != consumerReadAttrs)
NvSciBufAttrListFree(consumerReadAttrs);
if (NULL != combinedBufAttrs)
NvSciBufAttrListFree(combinedBufAttrs);
if (NULL != sendBufListDesc)
NvSciBufAttrListFreeDesc(sendBufListDesc);
// Free malloc'd resources
if (NULL != recvWaitListDesc)
free(recvWaitListDesc);
if (NULL != recvObjAndListDesc)
free(recvObjAndListDesc);
if (NULL != consumerReadAttrsDesc)
free(consumerReadAttrsDesc);
fprintf(stderr, "Producer exiting\n");
return NULL;
}