From a3f1b7ea3396b05883518ef262da7d5e77d803f9 Mon Sep 17 00:00:00 2001
From: svcmobrel-release <svcmobrel-release@nvidia.com>
Date: Fri, 19 Sep 2025 10:10:49 -0700
Subject: [PATCH] Updating prebuilts and/or headers

2eba699906039d6615aae4967f6ea79bfe44a40a - event_sample_app/block_pool.c
f3abb0a884f0647204ad32ff51255c4712e52120 - event_sample_app/Makefile
9ee49033e077ac5c8bf458a04c91dd3dbed9633d - event_sample_app/event_loop.h
b33adce6eb1bbc7af23f6c37b6a635479e18a66a - event_sample_app/block_returnsync.c
a56041c06b6bc1d3812b72b399d7d78dd7895485 - event_sample_app/block_limiter.c
ca34c957759f7a010f0cbbbf9bedc03a2c98092b - event_sample_app/block_c2c.c
8d6d0ec3aa8e374a1d2a5fedc9dd24ff7bbdb731 - event_sample_app/block_multicast.c
a76149a2531899e35843d939f60ad8979d8cf65f - event_sample_app/block_consumer_uc1.c
9da8763e4af4b4b7278507a3ebfe2c68a7a24585 - event_sample_app/util.h
2bf7e1383d6e8913c9b0a6a8bdd48fe63d8098d0 - event_sample_app/block_producer_uc1.c
a54abf82eaa2d888e379ab4596ba68ce264e80b5 - event_sample_app/block_info.h
080a6efe263be076c7046e70e31098c2bbed0f6d - event_sample_app/block_presentsync.c
7dd10e5ea71f0c4a09bbe1f9f148f67a13ee098c - event_sample_app/util.c
bc1a6f9017b28e5707c166a658a35e6b3986fdf4 - event_sample_app/usecase1.h
317f43efc59638bf1eae8303f0c79eafb059241a - event_sample_app/block_ipc.c
40361c8f0b68f7d5207db2466ce08c19c0bf1c90 - event_sample_app/event_loop_service.c
efad113d0107e5d8f90146f3102a7c0ed22f1a35 - event_sample_app/event_loop_threads.c
2908615cebcf36330b9850c57e8745bf324867b2 - event_sample_app/block_queue.c
36ed68eca1a7800cf0d94e763c9fc352ee8cda1e - event_sample_app/block_common.c
675f75d61bd0226625a8eaaf0e503c9e976c8d61 - event_sample_app/main.c
c3b26619dd07d221e953fc5dc29a50dcb95a8b97 - rawstream/Makefile
1fbb82e2281bb2e168c87fd20903bbed898ca160 - rawstream/rawstream_cuda.c
1d96498fe3c922f143f7e50e0a32b099714060ad - rawstream/rawstream_consumer.c
d077dafc9176686f6d081026225325c2a303a60e - rawstream/rawstream_producer.c
54ae655edddda7dcabe22fbf0b27c3f617978851 - rawstream/rawstream.h
d5ffeef3c7ad2af6f6f31385db7917b5ef9a7438 - rawstream/rawstream_ipc_linux.c
81e3d6f8ff5252797a7e9e170b74df6255f54f1b - rawstream/rawstream_main.c

Change-Id: I0f4e671693eb0addfe8d0e6532cc8f240cb6c778
---
 commitFile.txt                        |   29 +
 event_sample_app/00README.txt         |  258 +++++
 event_sample_app/Makefile             |  146 +++
 event_sample_app/block_c2c.c          |  170 ++++
 event_sample_app/block_common.c       |  279 ++++++
 event_sample_app/block_consumer_uc1.c | 1200 ++++++++++++++++++++++
 event_sample_app/block_info.h         |  284 ++++++
 event_sample_app/block_ipc.c          |  199 ++++
 event_sample_app/block_limiter.c      |   56 ++
 event_sample_app/block_multicast.c    |   57 ++
 event_sample_app/block_pool.c         |  743 ++++++++++++++
 event_sample_app/block_presentsync.c  |   55 +
 event_sample_app/block_producer_uc1.c | 1339 +++++++++++++++++++++++++
 event_sample_app/block_queue.c        |   57 ++
 event_sample_app/block_returnsync.c   |   55 +
 event_sample_app/event_loop.h         |   51 +
 event_sample_app/event_loop_service.c |  339 +++++++
 event_sample_app/event_loop_threads.c |  129 +++
 event_sample_app/main.c               | 1287 ++++++++++++++++++++++++
 event_sample_app/usecase1.h           |   55 +
 event_sample_app/util.c               |   66 ++
 event_sample_app/util.h               |   33 +
 push_info.txt                         |    1 +
 rawstream/Makefile                    |  126 +++
 rawstream/README.txt                  |   41 +
 rawstream/rawstream.h                 |  152 +++
 rawstream/rawstream_consumer.c        |  670 +++++++++++++
 rawstream/rawstream_cuda.c            |  234 +++++
 rawstream/rawstream_ipc_linux.c       |  172 ++++
 rawstream/rawstream_main.c            |  230 +++++
 rawstream/rawstream_producer.c        |  752 ++++++++++++++
 31 files changed, 9265 insertions(+)
 create mode 100644 commitFile.txt
 create mode 100644 event_sample_app/00README.txt
 create mode 100644 event_sample_app/Makefile
 create mode 100644 event_sample_app/block_c2c.c
 create mode 100644 event_sample_app/block_common.c
 create mode 100644 event_sample_app/block_consumer_uc1.c
 create mode 100644 event_sample_app/block_info.h
 create mode 100644 event_sample_app/block_ipc.c
 create mode 100644 event_sample_app/block_limiter.c
 create mode 100644 event_sample_app/block_multicast.c
 create mode 100644 event_sample_app/block_pool.c
 create mode 100644 event_sample_app/block_presentsync.c
 create mode 100644 event_sample_app/block_producer_uc1.c
 create mode 100644 event_sample_app/block_queue.c
 create mode 100644 event_sample_app/block_returnsync.c
 create mode 100644 event_sample_app/event_loop.h
 create mode 100644 event_sample_app/event_loop_service.c
 create mode 100644 event_sample_app/event_loop_threads.c
 create mode 100644 event_sample_app/main.c
 create mode 100644 event_sample_app/usecase1.h
 create mode 100644 event_sample_app/util.c
 create mode 100644 event_sample_app/util.h
 create mode 100644 push_info.txt
 create mode 100644 rawstream/Makefile
 create mode 100644 rawstream/README.txt
 create mode 100644 rawstream/rawstream.h
 create mode 100644 rawstream/rawstream_consumer.c
 create mode 100644 rawstream/rawstream_cuda.c
 create mode 100644 rawstream/rawstream_ipc_linux.c
 create mode 100644 rawstream/rawstream_main.c
 create mode 100644 rawstream/rawstream_producer.c

diff --git a/commitFile.txt b/commitFile.txt
new file mode 100644
index 0000000..fcc85d9
--- /dev/null
+++ b/commitFile.txt
@@ -0,0 +1,29 @@
+Updating prebuilts and/or headers 
+
+2eba699906039d6615aae4967f6ea79bfe44a40a - event_sample_app/block_pool.c
+f3abb0a884f0647204ad32ff51255c4712e52120 - event_sample_app/Makefile
+9ee49033e077ac5c8bf458a04c91dd3dbed9633d - event_sample_app/event_loop.h
+b33adce6eb1bbc7af23f6c37b6a635479e18a66a - event_sample_app/block_returnsync.c
+a56041c06b6bc1d3812b72b399d7d78dd7895485 - event_sample_app/block_limiter.c
+ca34c957759f7a010f0cbbbf9bedc03a2c98092b - event_sample_app/block_c2c.c
+8d6d0ec3aa8e374a1d2a5fedc9dd24ff7bbdb731 - event_sample_app/block_multicast.c
+a76149a2531899e35843d939f60ad8979d8cf65f - event_sample_app/block_consumer_uc1.c
+9da8763e4af4b4b7278507a3ebfe2c68a7a24585 - event_sample_app/util.h
+2bf7e1383d6e8913c9b0a6a8bdd48fe63d8098d0 - event_sample_app/block_producer_uc1.c
+a54abf82eaa2d888e379ab4596ba68ce264e80b5 - event_sample_app/block_info.h
+080a6efe263be076c7046e70e31098c2bbed0f6d - event_sample_app/block_presentsync.c
+7dd10e5ea71f0c4a09bbe1f9f148f67a13ee098c - event_sample_app/util.c
+bc1a6f9017b28e5707c166a658a35e6b3986fdf4 - event_sample_app/usecase1.h
+317f43efc59638bf1eae8303f0c79eafb059241a - event_sample_app/block_ipc.c
+40361c8f0b68f7d5207db2466ce08c19c0bf1c90 - event_sample_app/event_loop_service.c
+efad113d0107e5d8f90146f3102a7c0ed22f1a35 - event_sample_app/event_loop_threads.c
+2908615cebcf36330b9850c57e8745bf324867b2 - event_sample_app/block_queue.c
+36ed68eca1a7800cf0d94e763c9fc352ee8cda1e - event_sample_app/block_common.c
+675f75d61bd0226625a8eaaf0e503c9e976c8d61 - event_sample_app/main.c
+c3b26619dd07d221e953fc5dc29a50dcb95a8b97 - rawstream/Makefile
+1fbb82e2281bb2e168c87fd20903bbed898ca160 - rawstream/rawstream_cuda.c
+1d96498fe3c922f143f7e50e0a32b099714060ad - rawstream/rawstream_consumer.c
+d077dafc9176686f6d081026225325c2a303a60e - rawstream/rawstream_producer.c
+54ae655edddda7dcabe22fbf0b27c3f617978851 - rawstream/rawstream.h
+d5ffeef3c7ad2af6f6f31385db7917b5ef9a7438 - rawstream/rawstream_ipc_linux.c
+81e3d6f8ff5252797a7e9e170b74df6255f54f1b - rawstream/rawstream_main.c
diff --git a/event_sample_app/00README.txt b/event_sample_app/00README.txt
new file mode 100644
index 0000000..bb4b6e7
--- /dev/null
+++ b/event_sample_app/00README.txt
@@ -0,0 +1,258 @@
+SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+
+NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+property and proprietary rights in and to this material, related
+documentation and any modifications thereto. Any use, reproduction,
+disclosure or distribution of this material and related documentation
+without an express license agreement from NVIDIA CORPORATION or
+its affiliates is strictly prohibited.
+
+NvSciStream Event Loop Driven Sample App - README
+
+---
+# nvscistream_event_sample - NvSciStream Sample App
+
+## Description
+
+This directory contains an NvSciStream sample application that
+supports a variety of use cases, using an event-loop driven model.
+Once the stream is fully connected, all further setup and streaming
+operations are triggered by events, processed either by a single
+NvSciEvent-driven thread or separate threads which wait for events
+on each block. The former is the preferred approach for implementing
+NvSciStream applications. In addition to those events which NvSci
+itself generates, any other event which can be bound to an NvSciEvent
+can be added to the event loop. This allows for robust applications
+which can handle events regardless of the order in which they occur.
+
+To use this sample for writing your own applications:
+
+* See main.c for examples of how to do top level application setup and
+  how to select the blocks needed for your use case and connect them
+  all together.
+* See the descriptions in the usecase*.h files to determine which use cases
+  involve the producer and consumer engines that you are interested in.
+* See the appropriate block_*.c files for examples of creating the
+  necessary blocks and handling the events that they encounter.
+  See the block_producer_*.c and block_consumer_*.c files for examples of how
+  to map the relevant engines to and from NvSci.
+* See the appropriate event_loop_*.c file for your chosen event handling
+  method.
+
+## Build the application
+
+The NvSciStream sample includes source code and a Makefile.
+Navigate to the sample application directory to build the application:
+
+       make clean
+       make
+
+## Examples of how to run the sample application:
+
+* NOTE:
+* Inter-process and inter-chip test cases must be run with sudo.
+* NvMedia/CUDA stream (use case 2) of the sample application is not supported
+  on x86 and Jetson Linux devices.
+* Inter-chip use cases are not supported on Jetson Linux devices.
+* Update the NvIpc/PCIe endpoint accordingly.
+
+Single-process, single-consumer CUDA/CUDA stream that uses the default event
+service:
+
+    ./nvscistream_event_sample
+
+Single-process, single-consumer stream that uses the threaded event handling:
+
+    ./nvscistream_event_sample -e t
+
+Single-process NvMedia/CUDA stream with yuv format:
+    ./nvscistream_event_sample -u 2 -s y
+
+Single-process NvMedia/CUDA stream with three consumers, and the second uses
+the mailbox mode:
+
+    ./nvscistream_event_sample -u 2 -m 3 -q 1 m
+
+Multi-process CUDA/CUDA stream with three consumers, one in the same
+process as the producer, and the other two in separate processes. The
+first and the third consumers use the mailbox mode:
+
+    ./nvscistream_event_sample -m 3 -p -c 0 -q 0 m &
+    ./nvscistream_event_sample -c 1 -c 2 -q 2 m
+
+Multi-process CUDA/CUDA stream with three consumers, one in the same
+process as the producer, and the other two in separate processes.
+To simulate the case with a less trusted consumer, one of the consumer
+processes is set with lower priority. A limiter block is used to restrict
+this consumer to hold at most one packet. The total number of packets is
+increased to five.
+
+Linux example:
+
+    ./nvscistream_event_sample -m 3 -f 5 -p -c 0 -l 2 1 &
+    ./nvscistream_event_sample -c 1 &
+    nice -n 19 ./nvscistream_event_sample -c 2 &
+    # Makes the third process as nice as possible.
+
+QNX example:
+
+    ./nvscistream_event_sample -m 3 -f 5 -p -c 0 -l 2 1 &
+    ./nvscistream_event_sample -c 1 &
+    nice -n 1 ./nvscistream_event_sample -c 2 &
+    # Reduces the priority level of the third process by 1.
+
+Multi-process CUDA/CUDA stream with two consumers, one in the same
+process as the producer, and the other in a separate processe. Both
+processes enable the endpoint information option:
+
+    ./nvscistream_event_sample -m 2 -p -c 0 -i &
+    ./nvscistream_event_sample -c 1 -i
+
+Multi-process CUDA/CUDA stream with extra validation steps for ASIL-D process
+(Not support on x86 or Jetson Linux devices):
+    ./nvscistream_event_sample -u 3 -p  &
+    ./nvscistream_event_sample -u 3 -c 0
+
+Multi-process CUDA/CUDA stream using external event service to handle internal
+I/O messages acroess process boundary:
+    ./nvscistream_event_sample -p -E &
+    ./nvscistream_event_sample -c 0 -E
+
+Multi-process CUDA/CUDA stream with one consumer on another SoC.
+The consumer has the FIFO queue attached to the C2C IpcSrc block, and
+a three-packet pool attached to the C2C IpcDst block. It uses IPC channel
+nvscic2c_pcie_s0_c5_1 <-> nvscic2c_pcie_s0_c6_1 for C2C communication.
+
+    ./nvscistream_event_sample -P 0 nvscic2c_pcie_s0_c5_1 -Q 0 f
+    # Run below command on another OS running on peer SOC.
+    ./nvscistream_event_sample -C 0 nvscic2c_pcie_s0_c6_1 -F 0 3
+
+Multi-process CUDA/CUDA stream with four consumers, one in the same
+process as the producer, one in another process but in the same OS as the
+producer, and two in another process on another OS running in a peer SoC.
+The third and fourth consumers have a mailbox queue attached to the C2C
+IpcSrc block, and a five-packet pool attached to the C2C IpcDst block.
+The third consumer uses nvscic2c_pcie_s0_c5_1 <-> nvscic2c_pcie_s0_c6_1 for
+C2C communication. The 4th consumer uses nvscic2c_pcie_s0_c5_2 <->
+nvscic2c_pcie_s0_c6_2 for C2C communication.
+
+    ./nvscistream_event_sample -m 4 -c 0 -q 0 m -Q 2 m -Q 3 m -P 2 nvscic2c_pcie_s0_c5_1 -P 3 nvscic2c_pcie_s0_c5_2 &
+    ./nvscistream_event_sample -c 1 -q 1 m
+    # Run below command on another OS running on peer SOC.
+    ./nvscistream_event_sample -C 2 nvscic2c_pcie_s0_c6_1 -q 2 f -F 2 5 -C 3 nvscic2c_pcie_s0_c6_2 -q 3 m -F 3 5
+
+#Example commands for inter-process late attach usecase
+Multi-process CUDA/CUDA stream with one early consumer and one late-attached consumer
+Producer and early consumer processes are configured to stream 100000 frames, where as
+the late-attached consumer process is configured to receive 10000 frames.
+    # Run the below commands to launch producer and early consumer processes.
+    ./nvscistream_event_sample -m 2 -r 1 -p &
+    ./nvscistream_event_sample -c 0 -k 0 100000 &
+    # Run the below command after some delay to launch the late-attached consumer process.
+     sleep 1; # This 1s delay will let producer and consumer to enter into streaming phase.
+    ./nvscistream_event_sample -L -c 1 -k 1 10000 &
+
+Multi-process CUDA/CUDA stream with one early consumer and two late-attached consumers
+Producer and early consumer processes are configured to stream 100000 frames, where as
+the late-attached consumer process one is configured to receive 10000 frames and
+the late-attached consumer process two is configured to receive 50000 frames
+    # Run the below commands to launch producer and early consumer processes.
+    ./nvscistream_event_sample -m 3 -r 2 -p &
+    ./nvscistream_event_sample -c 0 -k 0 100000 &
+    # Run the below command after some delay to launch the late-attached consumer process one.
+     sleep 1; # This 1s delay will let producer and consumer to enter into streaming phase.
+    ./nvscistream_event_sample -L -c 1 -k 1 10000 &
+    # Run the below command after some delay to launch the late-attached consumer process two.
+     sleep 1; # This 1s delay will let producer and consumer to enter into streaming phase.
+    ./nvscistream_event_sample -L -c 2 -k 2 50000 &
+
+#Example commands for inter-process re-attach usecase
+Multi-process CUDA/CUDA stream with one early consumer and two late-attached consumers
+Producer and early consumer processes are configured to stream 100000 frames, where as
+the late-attached consumer process one is configured to receive 10000 frames and
+the late-attached consumer process two is configured to receive 50000 frames.
+Once late-attached consumer process one completes streaming, re-attach it for receiving
+5000 frames.
+    # Run the below commands to launch producer and early consumer processes.
+    ./nvscistream_event_sample -m 3 -r 2 -p &
+    ./nvscistream_event_sample -c 0 -k 0 100000 &
+    # Run the below command after some delay to launch the late-attached consumer process one.
+     sleep 1; # This 1s delay will let producer and consumer to enter into streaming phase.
+    ./nvscistream_event_sample -L -c 1 -k 1 10000 &
+    # Run the below command after some delay to launch the late-attached consumer process two.
+     sleep 1;
+    ./nvscistream_event_sample -L -c 2 -k 2 50000 &
+    # After late-attached consumer process one completes, re-attach it.
+    ./nvscistream_event_sample -L -c 1 -k 1 5000 &
+
+Limitations with C2C late/re-attach:
+This sample app has the following limitations.
+1. For C2C late/re-attach, this sample app does not support IPC consumer being the only early
+consumer and all the remaining consumers as C2C late-attached. This is due to setting static
+attribute logic for late-attach is not added.
+2. A C2C consumer can acts as an IPC consumer during late-/re-attach but an IPC consumer
+cannot be made as C2C consumer during Late/re-attach.
+
+#Example commands for inter-chip late attach usecase
+Multi-process CUDA/CUDA stream with one early C2C consumer and one C2C late-attached consumer
+Producer and early C2C consumer processes are configured to stream 100000 frames, where as
+the late-attached C2C consumer process is configured to receive 10000 frames.
+The early consumer uses nvscic2c_pcie_s0_c5_1 <-> nvscic2c_pcie_s0_c6_1 for
+C2C communication. The late-attached consumer uses nvscic2c_pcie_s0_c5_2 <->
+nvscic2c_pcie_s0_c6_2 for C2C communication.
+
+    # Run the below commands to launch producer on SOC1
+    ./nvscistream_event_sample -m 2 -r 1 -P 0 nvscic2c_pcie_s0_c5_1 -P 1 nvscic2c_pcie_s0_c5_2 &
+    # Run the below commands to launch early consumer process on SOC2
+    ./nvscistream_event_sample -C 0 nvscic2c_pcie_s0_c6_1 -k 0 100000 &
+    # Run the below command after some delay to launch the late-attached consumer process on SOC2
+     sleep 1; # This 1s delay will let producer and consumer to enter into streaming phase.
+    ./nvscistream_event_sample -L -C 1 nvscic2c_pcie_s0_c6_2 -k 1 10000 &
+
+Multi-process CUDA/CUDA stream with one early C2C consumer and two C2C late-attached consumer
+Producer and early C2C consumer processes are configured to stream 100000 frames, where as
+the late-attached C2C consumer process is one configured to receive 10000 frames and
+the late-attached C2C consumer process is two configured to receive 10000 frames.
+The early consumer uses nvscic2c_pcie_s0_c5_1 <-> nvscic2c_pcie_s0_c6_1 for
+C2C communication. The late-attached consumer one uses nvscic2c_pcie_s0_c5_2 <->
+nvscic2c_pcie_s0_c6_2 for C2C communication and the late-attached consumer two
+uses nvscic2c_pcie_s0_c5_3 <->nvscic2c_pcie_s0_c6_3 for C2C communication.
+
+    # Run the below commands to launch producer on SOC1
+    ./nvscistream_event_sample -m 3 -r 2 -P 0 nvscic2c_pcie_s0_c5_1 -P 1 nvscic2c_pcie_s0_c5_2 -P 2 nvscic2c_pcie_s0_c5_3 &
+    # Run the below commands to launch early consumer process on SOC2
+    ./nvscistream_event_sample -C 0 nvscic2c_pcie_s0_c6_1 -k 0 100000 &
+    # Run the below command after some delay to launch the late-attached consumer process.
+     sleep 1; # This 1s delay will let producer and consumer to enter into streaming phase.
+    ./nvscistream_event_sample -L -C 1 nvscic2c_pcie_s0_c6_2 -k 1 10000 &
+    # Run the below command after some delay to launch the late-attached consumer process.
+     sleep 1;
+    ./nvscistream_event_sample -L -C 2 nvscic2c_pcie_s0_c6_3 -k 2 10000 &
+
+#Example commands for inter-chip/process re-attach usecase
+Multi-process CUDA/CUDA stream with one early consumer and two late-attached consumers
+Producer and early consumer processes are configured to stream 100000 frames, where as
+the late-attached consumer process one is configured to receive 10000 frames and
+the late-attached consumer process two is configured to receive 50000 frames.
+Once late-attached consumer process one completes streaming, re-attach it for receiving
+5000 frames.
+Once late-attached consumer process two completes streaming, re-attach it as IPC consumer for receiving
+5000 frames.
+
+    # Run the below commands to launch producer on SOC1
+    ./nvscistream_event_sample -m 3 -r 2 -P 0 nvscic2c_pcie_s0_c5_1 -P 1 nvscic2c_pcie_s0_c5_2 -P 2 nvscic2c_pcie_s0_c5_3 &
+    # Run the below commands to launch early consumer process on SOC2
+    ./nvscistream_event_sample -C 0 nvscic2c_pcie_s0_c6_1 -k 0 100000 &
+    # Run the below command after some delay to launch the late-attached consumer process.
+     sleep 1; # This 1s delay will let producer and consumer to enter into streaming phase.
+    ./nvscistream_event_sample -L -C 1 nvscic2c_pcie_s0_c6_2 -k 1 10000 &
+    # Run the below command after some delay to launch the late-attached consumer process.
+     sleep 1;
+    ./nvscistream_event_sample -L -C 2 nvscic2c_pcie_s0_c6_3 -k 2 50000 &
+    # Once late-attached consumer process one completes streaming,
+    # re-attach it for receiving 5000 frames.
+    ./nvscistream_event_sample -L -C 1 nvscic2c_pcie_s0_c6_2 -k 1 5000 &
+    # Once late-attached consumer process two completes streaming,
+    # re-attach it as IPC consumer on SOC1 for receiving 5000 frames.
+    ./nvscistream_event_sample -L -c 2 -k 2 5000 &
diff --git a/event_sample_app/Makefile b/event_sample_app/Makefile
new file mode 100644
index 0000000..869e21c
--- /dev/null
+++ b/event_sample_app/Makefile
@@ -0,0 +1,146 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+#
+
+# Location of common libraries
+LIB_DIR = /usr/lib/aarch64-linux-gnu
+# NOTE: This directory PATH will be moved from "tegra" to "nvidia".
+TEGRA_LIB_DIR ?= /usr/lib/aarch64-linux-gnu/tegra
+
+# Location of the CUDA Toolkit
+CUDA_PATH ?= /usr/local/cuda
+
+# Location of NVSCI header
+NVSCI_HEADER_DIR ?= /usr/include/nvsci_headers
+NVSCI_LIB_DIR = $(TEGRA_LIB_DIR)
+
+GCC ?= g++
+NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(GCC)
+
+# internal flags
+NVCCFLAGS   :=
+CCFLAGS     :=
+LDFLAGS     :=
+
+# Extra user flags
+EXTRA_NVCCFLAGS   ?=
+EXTRA_LDFLAGS     ?=
+EXTRA_CCFLAGS     ?=
+
+override abi := aarch64
+LDFLAGS += --dynamic-linker=/lib/ld-linux-aarch64.so.1
+
+# Debug build flags
+dbg = 0
+ifeq ($(dbg),1)
+      NVCCFLAGS += -g -G
+      TARGET := debug
+else
+      TARGET := release
+endif
+
+ALL_CCFLAGS :=
+ALL_CCFLAGS += $(NVCCFLAGS)
+ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
+ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
+ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
+
+ALL_LDFLAGS :=
+ALL_LDFLAGS += $(ALL_CCFLAGS)
+ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
+ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
+
+################################################################################
+
+# Common includes and paths
+INCLUDES  := -I./
+LIBRARIES := -L$(LIB_DIR)
+
+CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
+CUDA_SEARCH_PATH += $(CUDA_PATH)/lib/stubs
+CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
+
+CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
+ifeq ("$(CUDALIB)","")
+  $(error ERROR - libcuda.so not found, CUDA Driver is not installed or CUDA_PATH is not correctly set.)
+else
+  CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
+  LIBRARIES += -L$(CUDALIB) -lcuda -lrt
+endif
+
+# Includes and paths for NVSCI libraries
+NVSCIBUFHEADER  := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscibuf.h -print 2>/dev/null)
+NVSCISYNCHEADER  := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscisync.h -print 2>/dev/null)
+NVSCISTREAMHEADER  := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscistream.h -print 2>/dev/null)
+NVSCIEVENTHEADER  := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscievent.h -print 2>/dev/null)
+NVSCIIPCHEADER  := $(shell find -L $(NVSCI_HEADER_DIR) -name nvsciipc.h -print 2>/dev/null)
+
+ifeq ("$(NVSCIBUFHEADER)","")
+    $(error ERROR - nvscibuf.h not found in $(NVSCI_HEADER_DIR))
+endif
+ifeq ("$(NVSCISYNCHEADER)","")
+    $(error ERROR - nvscisync.h not found in $(NVSCI_HEADER_DIR))
+endif
+ifeq ("$(NVSCISTREAMHEADER)","")
+    $(error ERROR - nvscistream.h not found in $(NVSCI_HEADER_DIR))
+endif
+ifeq ("$(NVSCIEVENTHEADER)","")
+    $(error ERROR - nvscievent.h not found in $(NVSCI_HEADER_DIR))
+endif
+ifeq ("$(NVSCIIPCHEADER)","")
+    $(error ERROR - nvsciipc.h not found in $(NVSCI_HEADER_DIR))
+endif
+
+INCLUDES  += -I$(NVSCI_HEADER_DIR)
+LIBRARIES += -L$(NVSCI_LIB_DIR) -lnvscibuf -lnvscisync -lnvscievent -lnvsciipc -lnvscistream
+
+ALL_CCFLAGS += --std=c++11 --threads 0
+
+################################################################################
+
+# Target rules
+OUTPUT := nvscistream_event_sample
+
+all: build
+
+build: $(OUTPUT)
+
+OBJ := main.o
+OBJ += block_common.o
+OBJ += block_c2c.o
+OBJ += block_consumer_uc1.o
+OBJ += block_ipc.o
+OBJ += block_limiter.o
+OBJ += block_multicast.o
+OBJ += block_pool.o
+OBJ += block_presentsync.o
+OBJ += block_producer_uc1.o
+OBJ += block_queue.o
+OBJ += block_returnsync.o
+OBJ += event_loop_service.o
+OBJ += event_loop_threads.o
+OBJ += util.o
+
+
+%.o: %.c
+	$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) -o $@ -c $<
+
+$(OUTPUT): $(OBJ)
+	$(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES)
+
+run: build
+	$(OUTPUT)
+
+testrun: build
+
+clean:
+	rm -f $(OBJ) $(OUTPUT)
+
+clobber: clean
diff --git a/event_sample_app/block_c2c.c b/event_sample_app/block_c2c.c
new file mode 100644
index 0000000..2ce75cc
--- /dev/null
+++ b/event_sample_app/block_c2c.c
@@ -0,0 +1,170 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - C2C block
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "nvsciipc.h"
+#include "nvscistream.h"
+#include "block_info.h"
+#include "event_loop.h"
+
+/* Create and register a new C2C src block */
+int32_t createC2cSrc(
+    NvSciStreamBlock* c2cSrc,
+    const char*       channel,
+    NvSciStreamBlock  queue)
+{
+    NvSciError err;
+    uint32_t i;
+    uint32_t slot = 0;
+
+    /* Create a data structure to track the block's status */
+    BlockData* blockData = createCommon("C2cSrc", 0);
+    if (NULL == blockData) {
+        return 0;
+    }
+
+    for (i=0; i< MAX_CONSUMERS; i++) {
+        if (!strcmp(ipcEP[i].c2cChannel, channel)) {
+            slot = i;
+            break;
+        }
+    }
+
+    /* Open the named channel */
+    err = NvSciIpcOpenEndpoint(channel, &ipcEP[slot].c2cEndpoint);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to open channel (%s) for C2C src\n",
+               err, channel);
+        deleteCommon(blockData);
+        return 0;
+    }
+    err = NvSciIpcResetEndpointSafe(ipcEP[slot].c2cEndpoint);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to reset IPC endpoint", err);
+    }
+
+    /* Create a C2C src block */
+    err = NvSciStreamIpcSrcCreate2(ipcEP[slot].c2cEndpoint,
+                                  sciSyncModule,
+                                  sciBufModule,
+                                  queue,
+                                  &blockData->block);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create C2C src block\n", err);
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    /* Register block with event handling mechanism */
+    if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    *c2cSrc = blockData->block;
+    ipcEP[slot].ipcBlock = blockData->block;
+    ipcEP[slot].c2cOpened = true;
+    ipcEP[slot].c2cConnected = true;
+
+    return 1;
+}
+
+
+/* Create and register a new C2C src block */
+int32_t createC2cSrc2(
+    NvSciStreamBlock* c2cSrc,
+    NvSciIpcEndpoint endpoint,
+    NvSciStreamBlock  queue)
+{
+    NvSciError err;
+
+    /* Create a data structure to track the block's status */
+    BlockData* blockData = createCommon("C2cSrc", 0);
+    if (NULL == blockData) {
+        return 0;
+    }
+
+    /* Create a C2C src block */
+    err = NvSciStreamIpcSrcCreate2(endpoint,
+                                  sciSyncModule,
+                                  sciBufModule,
+                                  queue,
+                                  &blockData->block);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create C2C src block\n", err);
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    /* Register block with event handling mechanism */
+    if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    *c2cSrc = blockData->block;
+    return 1;
+}
+
+/* Create and register a new C2C dst block */
+int32_t createC2cDst(
+    NvSciStreamBlock* c2cDst,
+    const char*       channel,
+    NvSciStreamBlock  pool)
+{
+    NvSciError err;
+
+    /* Create a data structure to track the block's status */
+    BlockData* blockData = createCommon("C2cDst", 0);
+    if (NULL == blockData) {
+        return 0;
+    }
+
+    /* Open the named channel */
+    err = NvSciIpcOpenEndpoint(channel, &ipcEP[0].ipcEndpoint);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to open channel (%s) for C2C dst\n",
+               err, channel);
+        deleteCommon(blockData);
+        return 0;
+    }
+    err = NvSciIpcResetEndpointSafe(ipcEP[0].ipcEndpoint);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to reset IPC endpoint", err);
+    }
+
+    /* Create a C2C dst block */
+    err = NvSciStreamIpcDstCreate2(ipcEP[0].ipcEndpoint,
+                                  sciSyncModule,
+                                  sciBufModule,
+                                  pool,
+                                  &blockData->block);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create C2C dst block\n", err);
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    /* Register block with event handling mechanism */
+    if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    *c2cDst = blockData->block;
+    return 1;
+}
diff --git a/event_sample_app/block_common.c b/event_sample_app/block_common.c
new file mode 100644
index 0000000..3922068
--- /dev/null
+++ b/event_sample_app/block_common.c
@@ -0,0 +1,279 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - common block event handling
+ *
+ * Block types which do not require type-specific interactions make use of
+ *   this common code.
+ */
+
+#include <stdlib.h>
+#if (QNX == 1)
+#include <sys/neutrino.h>
+#endif
+#include <stdio.h>
+#include <unistd.h>
+#include <pthread.h>
+#include "nvscistream.h"
+#include "block_info.h"
+#include "event_loop.h"
+
+/* Variable indicates whether the thread handling the
+* late consumer connetions is started or not
+*/
+static bool threadStarted = false;
+
+/* Delete common block */
+void deleteCommon(
+    void* data)
+{
+    BlockData* blockData = (BlockData*)data;
+
+    /* Destroy block */
+    if (blockData->block != 0) {
+        (void)NvSciStreamBlockDelete(blockData->block);
+    }
+
+    /* Check if it is late/re-attach usecase */
+    if (opts.numLateConsumer > 0U) {
+        /* Close the endpoints used by the IpcSrc/C2CSrc
+        * blocks for next late-/re-attach consumer connection
+        */
+        pthread_mutex_lock(&mutex);
+        if ((!strcmp(blockData->name, "IpcSrc")) ||
+            (!strcmp(blockData->name, "C2cSrc"))) {
+            for (uint32_t i=0; i< MAX_CONSUMERS; i++) {
+                if (ipcEP[i].ipcBlock == blockData->block) {
+                    /* close the Ipc endpoint */
+                    if (ipcEP[i].ipcEndpoint) {
+#if (QNX == 1)
+                        if (ipcEP[i].coid != 0) {
+                            (void)ConnectDetach_r(ipcEP[i].coid);
+                            ipcEP[i].coid = 0;
+                        }
+                        if (ipcEP[i].chid != 0) {
+                            (void)ChannelDestroy_r(ipcEP[i].chid);
+                            ipcEP[i].chid = 0;
+                        }
+#endif
+                        if (NvSciError_Success !=
+                            NvSciIpcCloseEndpointSafe(ipcEP[i].ipcEndpoint, false)) {
+                            printf("Failed to close ipc endpoint\n");
+                        }
+                        sleep(2);
+                        ipcEP[i].ipcEndpoint = 0U;
+                    }
+                    /* close the C2C endpoint */
+                    if (ipcEP[i].c2cEndpoint) {
+                        if (NvSciError_Success !=
+                            NvSciIpcCloseEndpointSafe(ipcEP[i].c2cEndpoint, false)) {
+                            printf("Failed to close ipc endpoint\n");
+                        }
+                        ipcEP[i].c2cEndpoint = 0U;
+                    }
+
+                    /* clear the informaton as this is needed
+                    * for next late-/re-attach connection
+                    */
+                    ipcEP[i].ipcBlock = 0U;
+                    ipcEP[i].ipcConnected = false;
+                    ipcEP[i].c2cConnected = false;
+                    ipcEP[i].ipcOpened = false;
+                    ipcEP[i].c2cOpened = false;
+                    break;
+                }
+            }
+            /* Wakeup the thread to handle the next set of
+            * late-/re-attach consumer connections
+            */
+            pthread_cond_signal(&cond);
+        }
+        pthread_mutex_unlock(&mutex);
+    }
+
+    /* Free data */
+    free(blockData);
+}
+
+/* Handle events on a common block
+ *
+ * Blocks that don't require interaction after connection usually just
+ *   receive notification of connection and disconnection.
+ */
+int32_t handleCommon(
+    void*     data,
+    uint32_t  wait)
+{
+    /* Cast to common data */
+    BlockData* blockData = (BlockData*)data;
+
+    /* Get time to wait */
+    int64_t waitTime = wait ? blockData->waitTime : 0;
+
+    /* Query/wait for an event on the block */
+
+    NvSciStreamEventType event;
+    NvSciError       err;
+    err = NvSciStreamBlockEventQuery(blockData->block, waitTime, &event);
+
+    /* Handle errors */
+    if (NvSciError_Success != err) {
+
+        /* If not asked to wait, a timeout is not an error */
+        if (!waitTime && (NvSciError_Timeout == err)) {
+            return 0;
+        }
+
+        /* Otherwise, any error is considered fatal. A timeout probably
+         *   indicates a failure to connect and complete setup in a timely
+         *   fashion, so we specifically call out this case.
+         */
+        if (NvSciError_Timeout == err) {
+            printf("%s timed out waiting for setup instructions\n",
+                   blockData->name);
+        } else {
+            printf("%s event query failed with error %x\n",
+                   blockData->name, err);
+        }
+        blockData->deleteFunc(blockData);
+        return -1;
+    }
+
+    /* If we received an event, handle it based on its type */
+    int32_t rv = 1;
+    NvSciError status;
+    switch (event) {
+
+    /*
+     * Any event we don't explicitly handle is a fatal error
+     */
+    default:
+        printf("%s received unknown event %x\n",
+               blockData->name, event);
+
+        rv = -1;
+        break;
+
+    /*
+     * Error events should never occur with safety-certified drivers,
+     *   and are provided only in non-safety builds for debugging
+     *   purposes. Even then, they should only occur when something
+     *   fundamental goes wrong, like the system running out of memory,
+     *   or stack/heap corruption, or a bug in NvSci which should be
+     *   reported to NVIDIA.
+     */
+    case NvSciStreamEventType_Error:
+        err = NvSciStreamBlockErrorGet(blockData->block, &status);
+        if (NvSciError_Success != err) {
+            printf("%s Failed to query the error event code %x\n",
+                   blockData->name, err);
+            rv = -1;
+        } else {
+            if ((opts.numLateConsumer > 0U) &&
+                (status == NvSciError_StreamNotConnected)) {
+                printf("[WARN] %s received error event: %x\n",
+                        blockData->name, status);
+                rv = 2;
+            } else {
+                printf("%s received error event: %x\n",
+                       blockData->name, status);
+                rv = -1;
+            }
+        }
+        break;
+
+    /*
+     * If told to disconnect, it means either the stream finished its
+     *   business or some other block had a failure. We'll just do a
+     *   clean up and return without an error.
+     */
+    case NvSciStreamEventType_Disconnected:
+        rv = 2;
+        break;
+
+    /*
+     * The block doesn't have to do anything on connection, but now we may
+     *   wait forever for any further events, so the timeout becomes infinite.
+     */
+    case NvSciStreamEventType_Connected:
+        /* Query producer and consumer(s) endpoint info if needed */
+        blockData->waitTime = -1;
+        break;
+
+    /* All setup complete. Transition to runtime phase */
+    case NvSciStreamEventType_SetupComplete:
+        if (opts.numLateConsumer > 0U) {
+            /* Check if it is multicast block */
+            if (!strcmp(blockData->name, "Multicast")) {
+                /* Wakeup the thread to handle the next set
+                * of late-/re-attach consumer connections
+                */
+                pthread_cond_signal(&cond);
+                if (!threadStarted) {
+                    threadStarted = true;
+                    /* Spawn a thread to handle the late attach connections */
+                    int32_t status = pthread_create(&dispatchThread,
+                                                    NULL,
+                                                    handleLateConsumerThreadFunc,
+                                                    NULL);
+                    if (status != 0) {
+                        printf("Failed to spawn thread to monitor late consumer connections\n");
+                        /* Abort the process as this thread is important
+                        * to process the late-/re-attach consumer connections.
+                        * Failed to create this thread makes the late/re-attach usecase
+                        * unusable.
+                        */
+                        abort();
+                    }
+                }
+            }
+        }
+        break;
+    }
+
+    /* On failure or final event, clean up the block */
+    if ((rv < 0) || (1 < rv)) {
+        blockData->deleteFunc(blockData);
+    }
+
+    return rv;
+}
+
+/* Create and register a new common block */
+BlockData* createCommon(
+    char const* name,
+    size_t      size)
+{
+    /* If no size specified, just use BlockData */
+    if (0 == size) {
+        size = sizeof(BlockData);
+    }
+
+    /* Create a data structure to track the block's status */
+    BlockData* commonData = (BlockData*)calloc(1, size);
+    if (NULL == commonData) {
+        printf("Failed to allocate data structure for %s\n", name);
+        return NULL;
+    }
+
+    /* Save the name for debugging purposes */
+    strcpy(commonData->name, name);
+
+    /* Wait time for initial connection event will be 60 seconds */
+    commonData->waitTime = 60 * 1000000;
+
+    /* Use the common delete function */
+    commonData->deleteFunc = deleteCommon;
+
+    return commonData;
+}
diff --git a/event_sample_app/block_consumer_uc1.c b/event_sample_app/block_consumer_uc1.c
new file mode 100644
index 0000000..0abf37b
--- /dev/null
+++ b/event_sample_app/block_consumer_uc1.c
@@ -0,0 +1,1200 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - consumer block for use case 1
+ *
+ * This implements the consumer for use case 1: cuda to cuda streaming
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "nvscibuf.h"
+#include "nvscisync.h"
+#include "nvscistream.h"
+#include "cuda.h"
+#include "cuda_runtime_api.h"
+#include "block_info.h"
+#include "event_loop.h"
+#include "usecase1.h"
+#include "util.h"
+
+/* Internal data structure used to track packets */
+typedef struct {
+    /* The packet handle use for NvSciStream functions */
+    NvSciStreamPacket    handle;
+    /* NvSci buffer object for the packet's data buffer */
+    NvSciBufObj          dataObj;
+    /* CUDA external memory handle for the data buffer */
+    cudaExternalMemory_t dataExtMem;
+    /* CUDA device memory pointer for the data buffer */
+    void*                dataDevMem;
+    /* Local system memory buffer used as the target for CUDA operations */
+    uint8_t*             dataDstMem;
+    /* NvSci buffer object for the packet's CRC buffer */
+    NvSciBufObj          crcObj;
+    /* Virtual address for the CRC buffer */
+    uint8_t const*       crcPtr;
+} ConsPacket;
+
+/* Internal data used by the consumer block */
+typedef struct {
+    /* Common block info */
+    BlockData               common;
+    /* CUDA device ID and UUID */
+    int32_t                 cudaDeviceId;
+    CUuuid                  cudaUuid;
+
+    /* CUDA consumer stream */
+    cudaStream_t            cudaStream;
+
+    /* CUDA sync attributes required for signaling */
+    NvSciSyncAttrList       signalAttr;
+    /* CUDA sync attributes required for waiting */
+    NvSciSyncAttrList       waiterAttr;
+    /* Sync object for CUDA to signal after processing data */
+    NvSciSyncObj            signalObj;
+    /* CUDA semaphore mapped to sync object */
+    cudaExternalSemaphore_t signalSem;
+
+    /* Sync object to wait for before processing data */
+    NvSciSyncObj            waiterObj;
+    /* CUDA semaphore mapped to sync object */
+    cudaExternalSemaphore_t waiterSem;
+
+    /* Element index chosen by pool for the CRC buffer */
+    uint32_t                crcIndex;
+    /* Element index chosen by pool for the data buffer */
+    uint32_t                dataIndex;
+    /* Size for data buffer after reconciling all requirements */
+    uint64_t                dataSize;
+    /* Number of packets provided by pool */
+    uint32_t                numPacket;
+    /* Information about each packet */
+    ConsPacket              packets[MAX_PACKETS];
+
+    /* Number of payloads processed so far */
+    uint32_t                counter;
+    /* Number of frames that the consumer is interested in receiving.
+     * This is mainly used for late/re-attach usecase, where a consumer
+     * can decide to receive certain number of frames before disconnecting.
+    */
+    uint32_t                frames;
+
+    /* Flag indicating consumer has finished receiving desired number of payloads */
+    bool                    finished;
+} ConsData;
+
+/* Free up the packet resources */
+static void deletePacket(
+    ConsPacket* packet)
+{
+    if (packet != NULL) {
+        if (packet->handle != NvSciStreamPacket_Invalid) {
+            /* Free CUDA memory mapping */
+            (void)cudaFree(packet->dataDevMem);
+            if (packet->dataExtMem) {
+                (void)cudaDestroyExternalMemory(packet->dataExtMem);
+                packet->dataExtMem = 0;
+            }
+
+            if (packet->dataDstMem) {
+                free(packet->dataDstMem);
+                packet->dataDstMem = NULL;
+            }
+
+            /* Free buffer objects */
+            if (packet->dataObj) {
+                NvSciBufObjFree(packet->dataObj);
+                packet->dataObj = NULL;
+            }
+
+            if (packet->crcObj) {
+                NvSciBufObjFree(packet->crcObj);
+                packet->crcObj = NULL;
+            }
+        }
+
+        /* Clear out packet information */
+        memset(packet, 0, sizeof(ConsPacket));
+    }
+}
+
+/* Free up consumer block resources */
+static void deleteConsumer(
+    ConsData* consData)
+{
+    /* Destroy block */
+    if (consData->common.block != 0) {
+        (void)NvSciStreamBlockDelete(consData->common.block);
+        consData->common.block = 0;
+    }
+
+    /* Free the packet resources */
+    for (uint32_t i=0;i<consData->numPacket; i++) {
+        deletePacket(&consData->packets[i]);
+    }
+
+    /* Free the sync objects */
+    if (consData->waiterObj != NULL) {
+        (void)cudaDestroyExternalSemaphore(consData->waiterSem);
+        consData->waiterSem = 0;
+        NvSciSyncObjFree(consData->waiterObj);
+        consData->waiterObj = NULL;
+    }
+
+    if (consData->signalObj != NULL) {
+        (void)cudaDestroyExternalSemaphore(consData->signalSem);
+        consData->signalSem = 0;
+        NvSciSyncObjFree(consData->signalObj);
+        consData->signalObj = NULL;
+    }
+
+    /* Destroy CUDA stream */
+    (void)cudaStreamDestroy(consData->cudaStream);
+
+    /* Free data */
+    free(consData);
+}
+
+/* Handle query of basic stream info */
+static int32_t handleStreamInit(
+    ConsData* consData)
+
+{
+    if (opts.endInfo) {
+        /* Query endpoint info from producer */
+        uint32_t size = INFO_SIZE;
+        char info[INFO_SIZE] = {0};
+        NvSciError err = NvSciStreamBlockUserInfoGet(
+                            consData->common.block,
+                            NvSciStreamBlockType_Producer, 0U,
+                            ENDINFO_NAME_PROC,
+                            &size, &info);
+        if (NvSciError_Success == err) {
+            printf("Producer info: %s\n", info);
+        } else if (NvSciError_StreamInfoNotProvided == err) {
+            printf("Info not provided by the producer\n");
+        } else {
+            printf("Failed (%x) to query the producer info\n", err);
+            return 0;
+        }
+    }
+    return 1;
+}
+
+/* Handle initialization of CUDA resources for consumer */
+static int32_t handleConsumerInit(
+    ConsData* consData)
+{
+    int32_t  cudaRtErr;
+    CUresult cudaErr;
+
+    /* Get stack limit */
+    size_t   unused;
+    cudaRtErr = cudaDeviceGetLimit(&unused, cudaLimitStackSize);
+    if (cudaSuccess != cudaRtErr) {
+        printf("Failed (%d) to get CUDA device limit\n", cudaRtErr);
+        return 0;
+    }
+
+    /* Set CUDA device */
+    consData->cudaDeviceId = 0;
+    cudaRtErr = cudaSetDevice(consData->cudaDeviceId);
+    if (cudaSuccess != cudaRtErr) {
+        printf("Failed (%d) to set CUDA device\n", cudaRtErr);
+        return 0;
+    }
+
+    /* Get UUID for CUDA device */
+    cudaErr = cuDeviceGetUuid(&consData->cudaUuid, consData->cudaDeviceId);
+    if (CUDA_SUCCESS != cudaErr) {
+        printf("Failed (%d) to get CUDA UUID\n", cudaErr);
+        return 0;
+    }
+
+    /* Get CUDA streams to be used for asynchronous operation */
+    cudaRtErr = cudaStreamCreateWithFlags(&consData->cudaStream,
+                                          cudaStreamNonBlocking);
+    if (cudaSuccess != cudaRtErr) {
+        printf("Failed (%d) to create CUDA stream\n", cudaRtErr);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Handle setup of supported buffer attributes */
+static int32_t handleConsumerElemSupport(
+    ConsData* consData)
+{
+    /*
+     * Note: To illustrate that NvSciStream producer and consumer do
+     *       not need to specify the same set of element types, or use
+     *       the same order for element types, the producer for this
+     *       use case sends the CRC attributes first, followed by the
+     *       primary data, while the consumer uses the opposite order.
+     *       Our pool implementation will end up using the producer
+     *       ordering, but that is not required either.
+     */
+
+    NvSciError             sciErr;
+    uint32_t               bufName[2];
+    NvSciBufAttrList       bufAttrs[2];
+
+    /*
+     * Data buffer requires read access by CPU and the GPU of the cuda
+     *   device, and uses a raw data buffer. (Size is specified by producer.)
+     */
+    NvSciBufAttrValAccessPerm dataPerm = NvSciBufAccessPerm_Readonly;
+    uint8_t dataCpu                    = 1U;
+    NvSciRmGpuId dataGpu               = { 0 };
+    NvSciBufType dataBufType           = NvSciBufType_RawBuffer;
+    memcpy(&dataGpu.bytes, &consData->cudaUuid.bytes, sizeof(dataGpu.bytes));
+    NvSciBufAttrKeyValuePair dataKeyVals[] = {
+        { NvSciBufGeneralAttrKey_GpuId, &dataGpu, sizeof(dataGpu) },
+        { NvSciBufGeneralAttrKey_Types, &dataBufType, sizeof(dataBufType) },
+        { NvSciBufGeneralAttrKey_RequiredPerm, &dataPerm, sizeof(dataPerm) },
+        { NvSciBufGeneralAttrKey_NeedCpuAccess, &dataCpu, sizeof(dataCpu) }
+    };
+
+    /* Create and fill attribute list for data buffer */
+    bufName[0] = ELEMENT_NAME_DATA;
+    sciErr = NvSciBufAttrListCreate(sciBufModule, &bufAttrs[0]);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to create data attribute list\n", sciErr);
+        return 0;
+    }
+    sciErr = NvSciBufAttrListSetAttrs(bufAttrs[0],
+                                      dataKeyVals,
+                                      sizeof(dataKeyVals) /
+                                          sizeof(NvSciBufAttrKeyValuePair));
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to fill data attribute list\n", sciErr);
+        return 0;
+    }
+
+    /*
+     * CRC buffer requires read access by CPU, and uses a raw 64 byte
+     *   data buffer with 1 byte alignment.
+     */
+    NvSciBufAttrValAccessPerm crcPerm = NvSciBufAccessPerm_Readonly;
+    uint8_t crcCpu                    = 1U;
+    NvSciBufType crcBufType           = NvSciBufType_RawBuffer;
+    uint64_t crcSize                  = 64U;
+    uint64_t crcAlign                 = 1U;
+    NvSciBufAttrKeyValuePair crcKeyVals[] = {
+        { NvSciBufGeneralAttrKey_Types, &crcBufType, sizeof(crcBufType) },
+        { NvSciBufRawBufferAttrKey_Size, &crcSize, sizeof(crcSize) },
+        { NvSciBufRawBufferAttrKey_Align, &crcAlign, sizeof(crcAlign) },
+        { NvSciBufGeneralAttrKey_RequiredPerm, &crcPerm, sizeof(crcPerm) },
+        { NvSciBufGeneralAttrKey_NeedCpuAccess, &crcCpu, sizeof(crcCpu) }
+    };
+
+    /* Create and fill attribute list for CRC checksum buffer */
+    bufName[1] = ELEMENT_NAME_CRC;
+    sciErr = NvSciBufAttrListCreate(sciBufModule, &bufAttrs[1]);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to create CRC attribute list\n", sciErr);
+        return 0;
+    }
+    sciErr = NvSciBufAttrListSetAttrs(bufAttrs[1],
+                                      crcKeyVals,
+                                      sizeof(crcKeyVals) /
+                                          sizeof(NvSciBufAttrKeyValuePair));
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to fill CRC attribute list\n", sciErr);
+        return 0;
+    }
+
+    /*
+     * Inform stream of the attributes
+     *   Once sent, the attribute lists are no longer needed
+     */
+    for (uint32_t i=0; i<2U; ++i) {
+        sciErr = NvSciStreamBlockElementAttrSet(consData->common.block,
+                                                bufName[i], bufAttrs[i]);
+        if (NvSciError_Success != sciErr) {
+            printf("Consumer failed (%x) to send element %d attribute\n",
+                   sciErr, i);
+            return 0;
+        }
+        NvSciBufAttrListFree(bufAttrs[i]);
+    }
+
+    /* Indicate that all element information has been exported */
+    sciErr = NvSciStreamBlockSetupStatusSet(consData->common.block,
+                                            NvSciStreamSetup_ElementExport,
+                                            true);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to complete element export\n", sciErr);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Handle receipt of chosen element attributes */
+static int32_t handleConsumerElemSetting(
+    ConsData*              consData)
+{
+    NvSciError err;
+
+    /*
+     * This application does not need to query the element count, because we
+     *   know it is always 2. But we do so anyways to show how it is done.
+     */
+    uint32_t count;
+    err = NvSciStreamBlockElementCountGet(consData->common.block,
+                                          NvSciStreamBlockType_Pool,
+                                          &count);
+    if (NvSciError_Success != err) {
+        printf("Consumer failed (%x) to query element count\n", err);
+        return 0;
+    }
+    if (2U != count) {
+        printf("Consumer received unexpected element count (%d)\n", count);
+        return 0;
+    }
+
+    /* Process all elements */
+    for (uint32_t i=0U; i<2U; ++i) {
+
+        /* Query element type and attributes */
+        uint32_t type;
+        NvSciBufAttrList bufAttr;
+        err = NvSciStreamBlockElementAttrGet(consData->common.block,
+                                             NvSciStreamBlockType_Pool, i,
+                                             &type, &bufAttr);
+        if (NvSciError_Success != err) {
+            printf("Consumer failed (%x) to query element attr %d\n", err, i);
+            return 0;
+        }
+
+        /* For data element, need to extract size and save index */
+        if (ELEMENT_NAME_DATA == type) {
+            consData->dataIndex = i;
+            NvSciBufAttrKeyValuePair keyVals[] = {
+                { NvSciBufRawBufferAttrKey_Size, NULL, 0 }
+            };
+            err = NvSciBufAttrListGetAttrs(bufAttr, keyVals, 1);
+            if (NvSciError_Success != err) {
+                printf("Consumer failed (%x) to obtain buffer size\n", err);
+                return 0;
+            }
+            consData->dataSize = *((const uint64_t*)(keyVals[0].value));
+
+            /* Set waiter attributes for the asynchronous element. */
+            err = NvSciStreamBlockElementWaiterAttrSet(consData->common.block,
+                                                       i,
+                                                       consData->waiterAttr);
+            if (NvSciError_Success != err) {
+                printf("Consumer failed (%x) to send waiter attr for elem %d\n",
+                       err, i);
+                return 0;
+            }
+
+            /* Once sent, the waiting attributes are no longer needed */
+            NvSciSyncAttrListFree(consData->waiterAttr);
+            consData->waiterAttr = NULL;
+        }
+
+        /* For CRC element, just need to save the index */
+        else if (ELEMENT_NAME_CRC == type) {
+            consData->crcIndex = i;
+
+            /* CRC element is a synchronous element.
+             * No need to set the waiter attr, which is NULL by default. */
+        }
+
+        /* Report any unknown element */
+        else {
+            printf("Consumer received unknown element type (%x)\n", type);
+            return 0;
+        }
+
+        /* Don't need to keep attribute list */
+        NvSciBufAttrListFree(bufAttr);
+
+        /*
+         * Indicate element will be used.
+         * This is the default, and we can omit this call in most applications,
+         *   but we illustrate its use for applications that only use some
+         *   of the buffers.
+         */
+        err = NvSciStreamBlockElementUsageSet(consData->common.block, i, true);
+        if (NvSciError_Success != err) {
+            printf("Consumer failed (%x) to indicate element %d is used\n",
+                   err, i);
+            return 0;
+        }
+    }
+
+    /* Indicate that element import is complete */
+    err = NvSciStreamBlockSetupStatusSet(consData->common.block,
+                                         NvSciStreamSetup_ElementImport,
+                                         true);
+    if (NvSciError_Success != err) {
+        printf("Consumer failed (%x) to complete element import\n", err);
+        return 0;
+    }
+
+    /* Indicate that waiter attribute export is done. */
+    err = NvSciStreamBlockSetupStatusSet(consData->common.block,
+                                         NvSciStreamSetup_WaiterAttrExport,
+                                         true);
+    if (NvSciError_Success != err) {
+        printf("Consumer failed (%x) to complete waiter attr export\n", err);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Handle creation of a new packet */
+static int32_t handleConsumerPacketCreate(
+    ConsData*         consData)
+{
+    NvSciError err;
+
+    /* Retrieve handle for packet pending creation */
+    NvSciStreamPacket handle;
+    err = NvSciStreamBlockPacketNewHandleGet(consData->common.block,
+                                             &handle);
+    if (NvSciError_Success != err) {
+        printf("Consumer failed (%x) to retrieve handle for the new packet\n",
+               err);
+        return 0;
+    }
+
+    /* Make sure there is room for more packets */
+    if (MAX_PACKETS <= consData->numPacket) {
+        printf("Consumer exceeded max packets\n");
+        err = NvSciStreamBlockPacketStatusSet(consData->common.block,
+                                              handle,
+                                              NvSciStreamCookie_Invalid,
+                                              NvSciError_Overflow);
+        if (NvSciError_Success != err) {
+            printf("Consumer failed (%x) to send packet status\n", err);
+        }
+        return 0;
+    }
+
+    /*
+     * Allocate the next entry in the array for the new packet.
+     *   Use the array entry for the cookie
+     */
+    ConsPacket* packet = &consData->packets[consData->numPacket++];
+    packet->handle = handle;
+
+    /* Retrieve all buffers and map into application
+     *   Consumers can skip querying elements that they don't use.
+     *   This use case has 2 elements.
+     */
+    for (uint32_t index = 0U; index < 2U; index++) {
+        NvSciBufObj bufObj;
+        err = NvSciStreamBlockPacketBufferGet(consData->common.block,
+                                              handle,
+                                              index,
+                                              &bufObj);
+        if (NvSciError_Success != err) {
+            printf("Consumer failed (%x) to retrieve buffer (%lx/%d)\n",
+                   err, handle, index);
+            return 0;
+        }
+
+        /* Handle mapping of data buffer */
+        NvSciError sciErr;
+        int32_t    cudaRtErr;
+
+        if (index == consData->dataIndex) {
+
+            /* Save buffer object */
+            packet->dataObj = bufObj;
+
+            /* Map in the buffer as CUDA external memory */
+            struct cudaExternalMemoryHandleDesc memHandleDesc;
+            memset(&memHandleDesc, 0, sizeof(memHandleDesc));
+            memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
+            memHandleDesc.handle.nvSciBufObject = bufObj;
+            memHandleDesc.size = consData->dataSize;
+            cudaRtErr = cudaImportExternalMemory(&packet->dataExtMem,
+                                                 &memHandleDesc);
+            if (cudaSuccess != cudaRtErr) {
+                printf("Consumer failed (%d) to map buffer as external mem\n",
+                       cudaRtErr);
+                return 0;
+            }
+
+            /* Map in the buffer as CUDA device memory */
+            struct cudaExternalMemoryBufferDesc memBufferDesc;
+            memset(&memBufferDesc, 0, sizeof(memBufferDesc));
+            memBufferDesc.size = consData->dataSize;
+            memBufferDesc.offset = 0;
+            cudaRtErr = cudaExternalMemoryGetMappedBuffer(&packet->dataDevMem,
+                                                          packet->dataExtMem,
+                                                          &memBufferDesc);
+            if (cudaSuccess != cudaRtErr) {
+                printf("Consumer failed (%d) to map buffer as device mem\n",
+                       cudaRtErr);
+                return 0;
+            }
+
+            /* Allocate normal memory to use as the target for the CUDA op */
+            packet->dataDstMem = (uint8_t*)malloc(consData->dataSize);
+            if (NULL == packet->dataDstMem) {
+                printf("Consumer failed to allocate target buffer\n");
+                return 0;
+            }
+
+            /* Fill in with initial values */
+            memset(packet->dataDstMem, 0xD0, consData->dataSize);
+
+        }
+
+        /* Handle mapping of CRC buffer */
+        else if (index == consData->crcIndex) {
+
+            /* Save buffer object */
+            packet->crcObj = bufObj;
+
+            /* Get a CPU pointer for the buffer from NvSci */
+            sciErr = NvSciBufObjGetConstCpuPtr(bufObj,
+                                               (void const**)&packet->crcPtr);
+            if (NvSciError_Success != sciErr) {
+                printf("Consumer failed (%x) to map CRC buffer\n", sciErr);
+                return 0;
+            }
+
+        }
+
+        /* Shouldn't be any other index */
+        else {
+            printf("Consumer received buffer for unknown element (%d)\n",
+                   index);
+            return 0;
+        }
+    }
+
+    /* Inform pool of success.
+     *   Note: Could inform the pool of any of the failures above.
+     */
+    err = NvSciStreamBlockPacketStatusSet(consData->common.block,
+                                          handle,
+                                          (NvSciStreamCookie)packet,
+                                          NvSciError_Success);
+    if (NvSciError_Success != err) {
+        printf("Consumer failed (%x) to inform pool of packet status\n", err);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Handle deletion of packet */
+static void handleConsumerPacketDelete(
+    ConsData*         consData)
+{
+    /* Get the deleted packet cookie*/
+    NvSciStreamCookie cookie;
+    NvSciError err =
+        NvSciStreamBlockPacketOldCookieGet(consData->common.block,
+                                           &cookie);
+    if (NvSciError_Success != err) {
+        printf("Consumer failed (%x) to retrieve the deleted packet cookie\n",
+               err);
+    }
+
+    /* Get packet pointer */
+    ConsPacket* packet = (ConsPacket*)cookie;
+
+    /* Free up the packet resources */
+    deletePacket(packet);
+}
+
+/* Handle setup of supported sync attributes */
+static int32_t handleConsumerSyncSupport(
+    ConsData* consData)
+{
+    NvSciError       sciErr;
+    int32_t          cudaRtErr;
+
+    /*
+     * Create sync attribute list for signaling.
+     *   This will be saved until we receive the producer's attributes
+     */
+    sciErr = NvSciSyncAttrListCreate(sciSyncModule, &consData->signalAttr);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to allocate signal sync attrs\n", sciErr);
+        return 0;
+    }
+
+    /* Have CUDA fill the signaling attribute list */
+    cudaRtErr = cudaDeviceGetNvSciSyncAttributes(consData->signalAttr,
+                                                 consData->cudaDeviceId,
+                                                 cudaNvSciSyncAttrSignal);
+    if (cudaSuccess != cudaRtErr) {
+        printf("Consumer failed (%d) to fill signal sync attrs\n", cudaRtErr);
+        return 0;
+    }
+
+    /* Create sync attribute list for waiting. */
+    sciErr = NvSciSyncAttrListCreate(sciSyncModule, &consData->waiterAttr);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to allocate waiter sync attrs\n", sciErr);
+        return 0;
+    }
+
+    /* Have CUDA fill the waiting attribute list */
+    cudaRtErr = cudaDeviceGetNvSciSyncAttributes(consData->waiterAttr,
+                                                 consData->cudaDeviceId,
+                                                 cudaNvSciSyncAttrWait);
+    if (cudaSuccess != cudaRtErr) {
+        printf("Consumer failed (%d) to fill waiter sync attrs\n", cudaRtErr);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Handle creation and export of consumer sync object */
+static int32_t handleConsumerSyncExport(
+    ConsData*         consData)
+{
+    NvSciError        sciErr;
+    uint32_t          cudaRtErr;
+
+    /* Process waiter attrs from all elements.
+     * As CRC element is a synchronous element,
+     * no need to query the sync object for it.
+     */
+    NvSciSyncAttrList waiterAttr = NULL;
+    sciErr = NvSciStreamBlockElementWaiterAttrGet(consData->common.block,
+                                                  consData->dataIndex,
+                                                  &waiterAttr);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to query waiter attr\n",
+                sciErr);
+        return 0;
+    }
+    if (NULL == waiterAttr) {
+        printf("Consumer received NULL waiter attr for data elem\n");
+        return 0;
+    }
+
+    /* Indicate that waiter attribute import is done. */
+    sciErr = NvSciStreamBlockSetupStatusSet(consData->common.block,
+                                            NvSciStreamSetup_WaiterAttrImport,
+                                            true);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to complete waiter attr import\n",
+               sciErr);
+        return 0;
+    }
+
+    /*
+     * Merge and reconcile producer sync attrs with ours.
+     */
+    NvSciSyncAttrList unreconciled[2] = {
+        consData->signalAttr,
+        waiterAttr };
+    NvSciSyncAttrList reconciled = NULL;
+    NvSciSyncAttrList conflicts = NULL;
+    sciErr = NvSciSyncAttrListReconcile(unreconciled, 2,
+                                        &reconciled, &conflicts);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to reconcile sync attributes\n", sciErr);
+        return 0;
+    }
+
+    /* Allocate sync object */
+    sciErr = NvSciSyncObjAlloc(reconciled, &consData->signalObj);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to allocate sync object\n", sciErr);
+        return 0;
+    }
+
+    /* Free the attribute lists */
+    NvSciSyncAttrListFree(consData->signalAttr);
+    consData->signalAttr = NULL;
+    NvSciSyncAttrListFree(waiterAttr);
+    NvSciSyncAttrListFree(reconciled);
+
+    /* Create CUDA semaphore for sync object */
+    struct cudaExternalSemaphoreHandleDesc extSemDesc;
+    memset(&extSemDesc, 0, sizeof(extSemDesc));
+    extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
+    extSemDesc.handle.nvSciSyncObj = consData->signalObj;
+    cudaRtErr = cudaImportExternalSemaphore(&consData->signalSem,
+                                            &extSemDesc);
+    if (cudaSuccess != cudaRtErr) {
+        printf("Consumer failed (%d) to map signal object to semaphore\n",
+               cudaRtErr);
+        return 0;
+    }
+
+    /* Only send the sync object for the asynchronous element.
+     * If this function is not called for an element,
+     * the sync object is assumed to be NULL.
+     * In this use case, CRC element doesn't use sync object.
+     */
+    sciErr = NvSciStreamBlockElementSignalObjSet(consData->common.block,
+                                                 consData->dataIndex,
+                                                 consData->signalObj);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to send sync object\n", sciErr);
+        return 0;
+    }
+
+    /* Indicate that sync object export is complete */
+    sciErr = NvSciStreamBlockSetupStatusSet(consData->common.block,
+                                            NvSciStreamSetup_SignalObjExport,
+                                            true);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to complete signal obj export\n",
+               sciErr);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Handle import of producer sync object */
+static int32_t handleConsumerSyncImport(
+    ConsData*         consData)
+{
+    uint32_t          cudaRtErr;
+    NvSciError        sciErr;
+    NvSciSyncObj      waiterObj = NULL;
+    /* Query sync object for asynchronous elements. */
+    sciErr = NvSciStreamBlockElementSignalObjGet(consData->common.block,
+                                                 0U, consData->dataIndex,
+                                                 &waiterObj);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to query sync object\n", sciErr);
+        return 0;
+    }
+
+    /* Save object */
+    consData->waiterObj = waiterObj;
+
+    /* If the waiter sync obj is NULL,
+     * it means this element is ready to use when received.
+     */
+    if (NULL != waiterObj) {
+        /* Create CUDA semaphore for sync object */
+        struct cudaExternalSemaphoreHandleDesc extSemDesc;
+        memset(&extSemDesc, 0, sizeof(extSemDesc));
+        extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
+        extSemDesc.handle.nvSciSyncObj = waiterObj;
+        cudaRtErr = cudaImportExternalSemaphore(&consData->waiterSem,
+                                                &extSemDesc);
+        if (cudaSuccess != cudaRtErr) {
+            printf("Consumer failed (%d) to map waiter object to semaphore\n",
+                   cudaRtErr);
+            return 0;
+        }
+    }
+
+    /* Indicate that element import is complete */
+    sciErr = NvSciStreamBlockSetupStatusSet(consData->common.block,
+                                            NvSciStreamSetup_SignalObjImport,
+                                            true);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to complete signal obj import\n",
+               sciErr);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Handle processing of payloads */
+static int32_t handleConsumerPayload(
+    ConsData*         consData)
+{
+    NvSciError        sciErr;
+    int32_t           cudaRtErr;
+
+    /* Obtain packet with the new payload */
+    NvSciStreamCookie cookie;
+    sciErr = NvSciStreamConsumerPacketAcquire(consData->common.block,
+                                              &cookie);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to obtain packet for payload\n", sciErr);
+        return 0;
+    }
+    ConsPacket* packet = (ConsPacket*)cookie;
+    consData->counter++;
+
+    /* Extract CRC value from the packet */
+    uint32_t crc = *((uint32_t const*)(packet->crcPtr));
+
+    /* If the received waiter obj if NULL,
+     * the producer is done writing data into this element,
+     * skip waiting on pre-fence.
+     */
+    if (NULL != consData->waiterObj) {
+
+        /* Query fences from producer for data element */
+        NvSciSyncFence prefence = NvSciSyncFenceInitializer;
+        sciErr = NvSciStreamBlockPacketFenceGet(
+                    consData->common.block,
+                    packet->handle,
+                    0U, consData->dataIndex,
+                    &prefence);
+        if (NvSciError_Success != sciErr) {
+            printf("Consumer failed (%x) to query fence from producer\n",
+                    sciErr);
+            return 0;
+        }
+
+        /* Instruct CUDA to wait for the producer fence */
+        struct cudaExternalSemaphoreWaitParams waitParams;
+        memset(&waitParams, 0, sizeof(waitParams));
+        waitParams.params.nvSciSync.fence = &prefence;
+        waitParams.flags = 0;
+        cudaRtErr = cudaWaitExternalSemaphoresAsync(&consData->waiterSem,
+                                                    &waitParams, 1,
+                                                    consData->cudaStream);
+        if (cudaSuccess != cudaRtErr) {
+            printf("Consumer failed (%d) to wait for prefence\n", cudaRtErr);
+            return 0;
+        }
+        NvSciSyncFenceClear(&prefence);
+    }
+
+    /* Instruct CUDA to copy the packet data buffer to the target buffer */
+    cudaRtErr = cudaMemcpy2DAsync(packet->dataDstMem,
+                                  consData->dataSize,
+                                  packet->dataDevMem,
+                                  consData->dataSize,
+                                  consData->dataSize,
+                                  1,
+                                  cudaMemcpyDeviceToHost,
+                                  consData->cudaStream);
+    if (cudaSuccess != cudaRtErr) {
+        printf("Consumer failed (%d) to issue copy command\n", cudaRtErr);
+        return 0;
+    }
+
+    /* Inform CUDA to signal a fence when the copy completes */
+    NvSciSyncFence postfence = NvSciSyncFenceInitializer;
+    struct cudaExternalSemaphoreSignalParams signalParams;
+    memset(&signalParams, 0, sizeof(signalParams));
+    signalParams.params.nvSciSync.fence = &postfence;
+    signalParams.flags = 0;
+    cudaRtErr = cudaSignalExternalSemaphoresAsync(&consData->signalSem,
+                                                  &signalParams,
+                                                  1,
+                                                  consData->cudaStream);
+    if (cudaSuccess != cudaRtErr) {
+        printf("Consumer failed (%d) to signal postfence\n", cudaRtErr);
+        return 0;
+    }
+
+    /* Update postfence for this element */
+    sciErr = NvSciStreamBlockPacketFenceSet(consData->common.block,
+                                            packet->handle,
+                                            consData->dataIndex,
+                                            &postfence);
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to set fence\n", sciErr);
+        return 0;
+    }
+
+    /* Release the packet back to the producer */
+    sciErr = NvSciStreamConsumerPacketRelease(consData->common.block,
+                                              packet->handle);
+
+    if (NvSciError_Success != sciErr) {
+        printf("Consumer failed (%x) to release packet\n", sciErr);
+        return 0;
+    }
+    NvSciSyncFenceClear(&postfence);
+
+    /*
+     * Wait for the copy to finish, generate a checkum and, compare
+     *   with the value from the packet
+     */
+    cudaStreamSynchronize(consData->cudaStream);
+    if (crc != generateCRC(packet->dataDstMem,
+                           1,
+                           consData->dataSize,
+                           consData->dataSize)) {
+        printf("Consumer CRC does not match the one from producer\n");
+    }
+
+    if (consData->frames == consData->counter) {
+        consData->finished = 1;
+    }
+    return 1;
+}
+
+/* Handle events on a consumer block
+ *
+ * The consumer block informs the stream of the consumers buffer and
+ *   synchronization requirements and capabilities, creates signaling
+ *   synchronization objects and receives synchronization objects to
+ *   wait for, maps buffers and synchronization objects to the consumer
+ *   engine(s), and processes data.
+ */
+static int32_t handleConsumer(
+    void*     data,
+    uint32_t  wait)
+{
+    /* Cast to consumer data */
+    ConsData* consData = (ConsData*)data;
+
+    /* Get time to wait */
+    int64_t waitTime = wait ? consData->common.waitTime : 0;
+
+    /* Query/wait for an event on the block */
+    NvSciStreamEventType event;
+    NvSciError       err;
+    err = NvSciStreamBlockEventQuery(consData->common.block, waitTime, &event);
+
+    /* Handle errors */
+    if (NvSciError_Success != err) {
+
+        /* If not asked to wait, a timeout is not an error */
+        if (!waitTime && (NvSciError_Timeout == err)) {
+            return 0;
+        }
+
+        /* Otherwise, any error is considered fatal. A timeout probably
+         *   indicates a failure to connect and complete setup in a timely
+         *   fashion, so we specifically call out this case.
+         */
+        if (NvSciError_Timeout == err) {
+            printf("Consumer timed out waiting for setup instructions\n");
+        } else {
+            printf("Consumer event query failed with error %x\n", err);
+        }
+        deleteConsumer(consData);
+        return -1;
+    }
+
+    /* If we received an event, handle it based on its type
+     *
+     * Note that there's a lot of error checking we could choose to do for
+     *   some of these events, like making sure that we only receive each
+     *   event once for a given entry. But NvSciStream is expected to take
+     *   care of all of that, even when the application makes a mistake.
+     *   So we only check for things that don't trigger NvSciStream errors.
+     */
+    int32_t rv = 1;
+    NvSciError status;
+    switch (event) {
+
+    /*
+     * Any event we don't explicitly handle is a fatal error
+     */
+    default:
+        printf("Consumer received unknown event %x\n", event);
+
+        rv = -1;
+        break;
+
+    /*
+     * Error events should never occur with safety-certified drivers,
+     *   and are provided only in non-safety builds for debugging
+     *   purposes. Even then, they should only occur when something
+     *   fundamental goes wrong, like the system running out of memory,
+     *   or stack/heap corruption, or a bug in NvSci which should be
+     *   reported to NVIDIA.
+     */
+    case NvSciStreamEventType_Error:
+        err = NvSciStreamBlockErrorGet(consData->common.block, &status);
+        if (NvSciError_Success != err) {
+            printf("%s Failed to query the error event code %x\n",
+                   consData->common.name, err);
+        } else {
+            printf("%s received error event: %x\n",
+                   consData->common.name, status);
+        }
+
+        rv = -1;
+        break;
+
+    /*
+     * If told to disconnect, it means either the stream finished its
+     *   business or some other block had a failure. We'll just do a
+     *   clean up and return without an error.
+     */
+    case NvSciStreamEventType_Disconnected:
+        printf("Consumer disconnected after receiving %d payloads\n",
+               consData->counter);
+        rv = 2;
+        break;
+
+    /*
+     * On connection, the consumer should initialize the appopriate engine(s)
+     *   and obtain the necessary buffer and synchronization attribute lists
+     *   for the desired use case.
+     */
+    case NvSciStreamEventType_Connected:
+        /* Initialize basic stream info */
+        if (!handleStreamInit(consData)) {
+            rv = -1;
+        }
+        /* Initialize CUDA access */
+        if (!handleConsumerInit(consData)) {
+            rv = -1;
+        }
+        /* Determine supported buffer attributes */
+        else if (!handleConsumerElemSupport(consData)) {
+            rv = -1;
+        }
+        /* Determined supported sync attributes */
+        else if (!handleConsumerSyncSupport(consData)) {
+            rv = -1;
+        }
+
+        /* Now that we're fully connected, set the wait time to infinite */
+        consData->common.waitTime = -1;
+        break;
+
+    /* Retrieve all element information from pool */
+    case NvSciStreamEventType_Elements:
+        if (!handleConsumerElemSetting(consData)) {
+            rv = -1;
+        }
+        break;
+
+    /* For a packet, set up an entry in the array */
+    case NvSciStreamEventType_PacketCreate:
+        if (!handleConsumerPacketCreate(consData)) {
+            rv = -1;
+        }
+        break;
+
+    /* Finish any setup related to packet resources */
+    case NvSciStreamEventType_PacketsComplete:
+        /* For this use case, nothing else to setup.
+         *   Inform the NvSciStream that the consumer has imported all packets.
+         */
+        err = NvSciStreamBlockSetupStatusSet(consData->common.block,
+                                             NvSciStreamSetup_PacketImport,
+                                             true);
+        if (NvSciError_Success != err) {
+            printf("Consumer failed (%x) to complete packet import\n", err);
+            rv = -1;
+        }
+        break;
+
+    /* Delete a packet - usually only relevant for non-safety applications */
+    case NvSciStreamEventType_PacketDelete:
+        handleConsumerPacketDelete(consData);
+        break;
+
+    case NvSciStreamEventType_WaiterAttr:
+        if (!handleConsumerSyncExport(consData)) {
+            rv = -1;
+        }
+        break;
+
+    /* Import producer sync objects for all elements */
+    case NvSciStreamEventType_SignalObj:
+        if (!handleConsumerSyncImport(consData)) {
+            rv = -1;
+        }
+        break;
+
+    /* All setup complete. Transition to runtime phase */
+    case NvSciStreamEventType_SetupComplete:
+        printf("Consumer setup completed\n");
+        break;
+
+    /* Processs payloads when packets arrive */
+    case NvSciStreamEventType_PacketReady:
+        if (!handleConsumerPayload(consData)) {
+            rv = -1;
+        } else if (consData->finished) {
+            printf("Consumer finished receiving %d payloads\n", consData->counter);
+            rv = 2;
+        }
+        break;
+    }
+
+    /* On failure or final event, clean up the block */
+    if ((rv < 0) || (1 < rv)) {
+        deleteConsumer(consData);
+    }
+
+    return rv;
+}
+
+/* Create and register a new consumer block */
+int32_t createConsumer_Usecase1(
+    NvSciStreamBlock* consumer,
+    NvSciStreamBlock  pool,
+    uint32_t          index,
+    uint32_t          frames)
+{
+    /*
+     * The index is ignored. It is provided to support use cases where
+     *   there are multiple consumers that don't all do the same thing.
+     */
+     (void)index;
+
+    /* Create a data structure to track the block's status */
+    ConsData* consData = (ConsData*)calloc(1, sizeof(ConsData));
+    if (NULL == consData) {
+        printf("Failed to allocate data structure for consumer\n");
+        return 0;
+    }
+
+    /* Save the name for debugging purposes */
+    strcpy(consData->common.name, "Consumer");
+
+    /* Wait time for initial connection event will be 60 seconds */
+    consData->common.waitTime = 60 * 1000000;
+    consData->frames = frames;
+
+    /* Create a pool block */
+    NvSciError err =
+        NvSciStreamConsumerCreate(pool, &consData->common.block);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create consumer block\n", err);
+        deleteConsumer(consData);
+        return 0;
+    }
+
+    if (opts.endInfo) {
+        /* Add endpoint information on consumer side.
+         *  Application can specify user-defined info to help set up stream,
+         *  which can be queried by other blocks after stream connection.
+         */
+        char info[INFO_SIZE] = {0};
+        size_t infoSize =
+            snprintf(info, INFO_SIZE, "%s%d", "Consumer proc: ", getpid());
+        err = NvSciStreamBlockUserInfoSet(consData->common.block,
+                                          ENDINFO_NAME_PROC,
+                                          infoSize, info);
+        if (NvSciError_Success != err) {
+            printf("Failed (%x) to setup the consumer info\n", err);
+            deleteConsumer(consData);
+            return 0;
+        }
+    }
+
+    /* Register block with event handling mechanism */
+    if (!eventFuncs->reg(consData->common.block, consData, handleConsumer)) {
+        deleteConsumer(consData);
+        return 0;
+    }
+
+    *consumer = consData->common.block;
+    return 1;
+}
diff --git a/event_sample_app/block_info.h b/event_sample_app/block_info.h
new file mode 100644
index 0000000..323725a
--- /dev/null
+++ b/event_sample_app/block_info.h
@@ -0,0 +1,284 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - block abstraction
+ */
+
+#ifndef _BLOCK_INFO_H
+#define _BLOCK_INFO_H 1
+#include <stdatomic.h>
+#include <pthread.h>
+#include "nvscistream.h"
+
+/* Maximum number of consumers */
+#define MAX_CONSUMERS 4
+
+/* Maximum number of blocks */
+#define MAX_BLOCKS 100
+
+/* Maximum number of packets supported */
+#define MAX_PACKETS 32
+
+/* Maximum number of elements supported */
+#define MAX_ELEMS 8
+
+/* Memory size of endpoint inforamtion */
+#define INFO_SIZE 50
+
+/* NvSci modules for all objects */
+extern NvSciSyncModule sciSyncModule;
+extern NvSciBufModule  sciBufModule;
+
+/* Flag used to terminate the thread that
+* was spawned to handle the late/re-attached
+* consumer connections upon stream disconnect
+*/
+extern atomic_int streamDone;
+
+/* Number of registered blocks for streaming */
+extern int32_t numBlocks;
+
+/* Number of active blocks */
+extern uint32_t numAlive;
+
+/* variables for synchronization */
+extern pthread_mutex_t mutex;
+extern pthread_cond_t cond;
+
+/* Thread for handling late/re-attached consumer
+* connections
+*/
+extern pthread_t dispatchThread;
+
+/* Common options for all blocks */
+typedef struct {
+    /* Indicate whether the producer/consumer sets endpoint info */
+    bool endInfo;
+    /* Indicate whether the producer uses yuv format */
+    bool yuv;
+    /* Indicate whether the extern event service is used */
+    bool useExtEventService;
+    /* Indicates the number of late consumers for late/re-attach usecase */
+    uint32_t numLateConsumer;
+    /* Total number of consumers */
+    uint32_t numConsumer;
+    /* Indicates c2c usecase */
+    bool c2cMode;
+    /* Indicates consumer connection is late/reattach*/
+    bool lateAttach;
+} CommonOptions;
+
+extern CommonOptions opts;
+
+/* Endpoint data structure for tracking the
+*  IPC/C2C channels
+*/
+typedef struct {
+    /* Holds the IPC endpoint corresponding to an IPC channel */
+    NvSciIpcEndpoint ipcEndpoint;
+    /* Holds the C2C endpoint corresponding to an C2C channel */
+    NvSciIpcEndpoint c2cEndpoint;
+    /* named IPC channel */
+    char ipcChannel[32];
+    /* named IPC channel used for handsking between
+    * producer and late/re-attached consumer connection
+    */
+    char ipcChannelForHandshake[32];
+    /* named c2c channel */
+    char c2cChannel[32];
+    /* IPC/C2C block created for handling the late/re-attached
+    * consumer connections*/
+    NvSciStreamBlock ipcBlock;
+    /* Queue block that is needed for c2c usecase for a c2c
+    * consumer late/reattach connections
+    */
+    NvSciStreamBlock queue;
+    /* ReturnSync block that is needed for c2c usecase for a c2c
+    * consumer late/reattach connections
+    */
+    NvSciStreamBlock returnSync;
+
+    /* Indicates the connect state of IPC channel */
+    bool ipcConnected;
+    /* Indicates the connect state of C2C channel */
+    bool c2cConnected;
+    /* Indicates the Open state of IPC channel */
+    bool ipcOpened;
+    /* Indicates the Open state of C2C channel */
+    bool c2cOpened;
+    /* QNX channel ID for communication */
+    int32_t chid;
+    /* QNX channel connection ID */
+    int32_t coid;
+} Endpoint;
+
+extern Endpoint ipcEP[MAX_CONSUMERS];
+
+typedef struct {
+    /* The application's name for the element */
+    uint32_t         userName;
+    /* Attribute list for element */
+    NvSciBufAttrList attrList;
+} ElemAttr;
+
+/*
+ * Some block types that do not require direct interaction will share a
+ *   common private data structure and event handling functon.
+ */
+
+/* Common block private data */
+typedef struct {
+    NvSciStreamBlock  block;
+    int64_t           waitTime;
+    char              name[32];
+    void            (*deleteFunc)(void*);
+} BlockData;
+
+typedef int32_t (*BlockFunc)(void* data, uint32_t wait);
+
+/* Structure to track block info */
+typedef struct {
+    NvSciStreamBlock    handle;
+    void*               data;
+    BlockFunc           func;
+    NvSciEventNotifier* notifier;
+    bool                isAlive;
+    bool                retry;
+} BlockEventData;
+
+extern BlockEventData       blocks[MAX_BLOCKS];
+extern BlockEventData*      blocksAlive[MAX_BLOCKS];
+
+/* Create data structure for common blocks */
+extern BlockData* createCommon(
+    char const* name,
+    size_t      size);
+
+/* Handle event for common block */
+extern int32_t handleCommon(
+    void*       data,
+    uint32_t    wait);
+
+/* Delete common block */
+extern void deleteCommon(
+    void*       data);
+
+/*
+ * Functions for setting up each kind of block
+ */
+
+extern int32_t createIpcDst(
+    NvSciStreamBlock* ipcDst,
+    const char*       channel,
+    bool              useExternalEventService);
+
+extern int32_t createIpcSrc(
+    NvSciStreamBlock* ipcSrc,
+    const char*       channel,
+    bool              useExternalEventService);
+
+extern int32_t createIpcSrc2(
+    NvSciStreamBlock* ipcsrc,
+    NvSciIpcEndpoint endpoint,
+    bool              useExtEventService);
+
+extern int32_t createC2cSrc2(
+    NvSciStreamBlock* c2cSrc,
+    NvSciIpcEndpoint endpoint,
+    NvSciStreamBlock  queue);
+
+extern int32_t createC2cDst(
+    NvSciStreamBlock* c2cDst,
+    const char*       channel,
+    NvSciStreamBlock  pool);
+
+extern int32_t createC2cSrc(
+    NvSciStreamBlock* c2cSrc,
+    const char*       channel,
+    NvSciStreamBlock  queue);
+
+extern int32_t createLimiter(
+    NvSciStreamBlock* limiter,
+    uint32_t          limit);
+
+extern int32_t createPresentSync(
+    NvSciStreamBlock* presentSync);
+
+extern int32_t createReturnSync(
+    NvSciStreamBlock* returnSync);
+
+extern int32_t createMulticast(
+    NvSciStreamBlock* multicast,
+    uint32_t          numConsumer);
+
+extern int32_t createPool_Common(
+    NvSciStreamBlock* pool,
+    uint32_t          numPacket,
+    bool              isC2cPool);
+
+extern int32_t createQueue(
+    NvSciStreamBlock* queue,
+    uint32_t          useMailbox);
+
+extern int32_t (*createProducer)(
+    NvSciStreamBlock* producer,
+    NvSciStreamBlock  pool,
+    uint32_t numFrames);
+
+extern int32_t (*createConsumer)(
+    NvSciStreamBlock* consumer,
+    NvSciStreamBlock  queue,
+    uint32_t          index,
+    uint32_t frames);
+
+extern int32_t (createProducer_Usecase1)(
+    NvSciStreamBlock* producer,
+    NvSciStreamBlock  pool,
+    uint32_t          numFrames);
+
+extern int32_t (createConsumer_Usecase1)(
+    NvSciStreamBlock* consumer,
+    NvSciStreamBlock  queue,
+    uint32_t          index,
+    uint32_t          frames);
+
+extern int32_t (createProducer_Usecase2)(
+    NvSciStreamBlock* producer,
+    NvSciStreamBlock  pool,
+    uint32_t          numFrames);
+
+extern int32_t (createConsumer_Usecase2)(
+    NvSciStreamBlock* consumer,
+    NvSciStreamBlock  queue,
+    uint32_t          index,
+    uint32_t          frames);
+
+extern int32_t(createProducer_Usecase3)(
+    NvSciStreamBlock* producer,
+    NvSciStreamBlock  pool,
+    uint32_t          numFrames);
+
+extern int32_t(createConsumer_Usecase3)(
+    NvSciStreamBlock* consumer,
+    NvSciStreamBlock  queue,
+    uint32_t          index,
+    uint32_t          frames);
+
+extern int32_t createPool_Usecase3(
+    NvSciStreamBlock* pool,
+    uint32_t          numPacket,
+    bool              isC2cPool);
+
+extern void* handleLateConsumerThreadFunc(void*);
+
+#endif // _BLOCK_INFO_H
diff --git a/event_sample_app/block_ipc.c b/event_sample_app/block_ipc.c
new file mode 100644
index 0000000..4def6e5
--- /dev/null
+++ b/event_sample_app/block_ipc.c
@@ -0,0 +1,199 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - ipc blocks
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "nvsciipc.h"
+#include "nvscistream.h"
+#include "block_info.h"
+#include "event_loop.h"
+
+/*
+ * Ipc blocks do not require any block-specific interactions so we
+ *   use the set of common functions to handle its events. However
+ *   they do have an additional data field which needs to be cleaned
+ *   up when the block is destroyed, so we use more than the common
+ *   data structure and delete function.
+ */
+
+/* Create and register a new ipcsrc block */
+int32_t createIpcSrc(
+    NvSciStreamBlock* ipcsrc,
+    const char*       channel,
+    bool              useExtEventService)
+{
+    NvSciError err;
+    uint32_t i;
+    uint32_t slot = 0;
+
+    /* Create a data structure to track the block's status */
+    BlockData* blockData = createCommon("IpcSrc", 0);
+    if (NULL == blockData) {
+        return 0;
+    }
+
+    for (i=0; i< MAX_CONSUMERS; i++) {
+        if (!strcmp(ipcEP[i].ipcChannel, channel)) {
+            slot = i;
+            break;
+        }
+    }
+
+    /* Open the named channel */
+    err = NvSciIpcOpenEndpoint(channel, &ipcEP[slot].ipcEndpoint);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to open channel (%s) for IpcSrc\n",
+               err, channel);
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    err = NvSciIpcResetEndpointSafe(ipcEP[slot].ipcEndpoint);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to reset IPC endpoint", err);
+    }
+
+    /* Create a ipcsrc block */
+    err = NvSciStreamIpcSrcCreate(ipcEP[slot].ipcEndpoint,
+                                  sciSyncModule,
+                                  sciBufModule,
+                                  &blockData->block);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create IpcSrc block\n", err);
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    /* Register block with event handling mechanism */
+    if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    /* Configuate block to use the external event service for
+     *  internal I/O messages */
+    if (useExtEventService && !eventFuncs->regInt(blockData->block)) {
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    *ipcsrc = blockData->block;
+    ipcEP[slot].ipcBlock = blockData->block;
+    ipcEP[slot].ipcOpened = true;
+    ipcEP[slot].ipcConnected = true;
+
+    return 1;
+}
+
+
+/* Create and register a new ipcsrc block */
+int32_t createIpcSrc2(
+    NvSciStreamBlock* ipcsrc,
+    NvSciIpcEndpoint  endpoint,
+    bool              useExtEventService)
+{
+    NvSciError err;
+
+    /* Create a data structure to track the block's status */
+    BlockData* blockData = createCommon("IpcSrc", 0);
+    if (NULL == blockData) {
+        return 0;
+    }
+
+    /* Create a ipcsrc block */
+    err = NvSciStreamIpcSrcCreate(endpoint,
+                                  sciSyncModule,
+                                  sciBufModule,
+                                  &blockData->block);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create IpcSrc block\n", err);
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    /* Register block with event handling mechanism */
+    if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    /* Configuate block to use the external event service for
+     *  internal I/O messages */
+    if (useExtEventService && !eventFuncs->regInt(blockData->block)) {
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    *ipcsrc = blockData->block;
+
+    return 1;
+}
+
+/* Create and register a new ipcdst block */
+int32_t createIpcDst(
+    NvSciStreamBlock* ipcdst,
+    const char*       channel,
+    bool              useExtEventService)
+{
+    NvSciError err;
+
+    /* Create a data structure to track the block's status */
+    BlockData* blockData = createCommon("IpcDst", 0);
+    if (NULL == blockData) {
+        return 0;
+    }
+
+    /* Open the named channel */
+    err = NvSciIpcOpenEndpoint(channel, &ipcEP[0].ipcEndpoint);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to open channel (%s) for IpcDst\n",
+               err, channel);
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    err = NvSciIpcResetEndpointSafe(ipcEP[0].ipcEndpoint);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to reset IPC endpoint", err);
+    }
+
+    /* Create a ipcdst block */
+    err = NvSciStreamIpcDstCreate(ipcEP[0].ipcEndpoint,
+                                  sciSyncModule,
+                                  sciBufModule,
+                                  &blockData->block);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create IpcDst block\n", err);
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    /* Register block with event handling mechanism */
+    if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    /* Configuate block to use the external event service for
+     *  internal I/O messages */
+    if (useExtEventService && !eventFuncs->regInt(blockData->block)) {
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    *ipcdst = blockData->block;
+    return 1;
+}
diff --git a/event_sample_app/block_limiter.c b/event_sample_app/block_limiter.c
new file mode 100644
index 0000000..63d0c5f
--- /dev/null
+++ b/event_sample_app/block_limiter.c
@@ -0,0 +1,56 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - limiter block
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "nvscistream.h"
+#include "block_info.h"
+#include "event_loop.h"
+
+/*
+ * Limiter blocks do not require any block-specific interactions so we
+ *   use the set of common functions to handle its events.
+ */
+
+/* Create and register a new limiter block */
+int32_t createLimiter(
+    NvSciStreamBlock* limiter,
+    uint32_t          limit)
+{
+    /* Create a data structure to track the block's status */
+    BlockData* blockData = createCommon("Limiter", 0);
+    if (NULL == blockData) {
+        return 0;
+    }
+
+    /* Create a limiter block */
+    NvSciError err =
+        NvSciStreamLimiterCreate(limit, &blockData->block);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create limiter block\n", err);
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    /* Register block with event handling mechanism */
+    if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    *limiter = blockData->block;
+    return 1;
+}
diff --git a/event_sample_app/block_multicast.c b/event_sample_app/block_multicast.c
new file mode 100644
index 0000000..7b59596
--- /dev/null
+++ b/event_sample_app/block_multicast.c
@@ -0,0 +1,57 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - multicast block
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "nvscistream.h"
+#include "block_info.h"
+#include "event_loop.h"
+
+/*
+ * Multicast blocks do not require any block-specific interactions so we
+ *   use the set of common functions to handle its events.
+ */
+
+/* Create and register a new multicast block */
+int32_t createMulticast(
+    NvSciStreamBlock* multicast,
+    uint32_t          numConsumer)
+{
+    /* Create a data structure to track the block's status */
+    BlockData* blockData = createCommon("Multicast", 0);
+    if (NULL == blockData) {
+        return 0;
+    }
+
+    /* Create a multicast block */
+    NvSciError err =
+        NvSciStreamMulticastCreate(numConsumer, &blockData->block);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create limiter block\n", err);
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    /* Register block with event handling mechanism */
+    if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    *multicast = blockData->block;
+    return 1;
+}
+
diff --git a/event_sample_app/block_pool.c b/event_sample_app/block_pool.c
new file mode 100644
index 0000000..dd680bb
--- /dev/null
+++ b/event_sample_app/block_pool.c
@@ -0,0 +1,743 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - pool block
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "nvscistream.h"
+#include "block_info.h"
+#include "event_loop.h"
+
+/* Internal data used by the pool block */
+typedef struct {
+    BlockData         common;
+    bool              isC2cPool;
+    uint32_t          numConsumers;
+    uint32_t          numProdElem;
+    uint32_t          numConsElem;
+    bool              elementsDone;
+    ElemAttr          prodElem[MAX_ELEMS];
+    ElemAttr          consElem[MAX_ELEMS];
+    uint32_t          numPacket;
+    uint32_t          numPacketReady;
+    bool              packetsDone;
+    NvSciStreamPacket packet[MAX_PACKETS];
+} PoolData;
+
+/* Free up pool block resources */
+static void deletePool(
+    PoolData* poolData)
+{
+    /* Destroy block */
+    if (poolData->common.block != 0) {
+        (void)NvSciStreamBlockDelete(poolData->common.block);
+    }
+
+    /* Free data */
+    free(poolData);
+}
+
+/* Handle query of basic stream info */
+static int32_t handleStreamInit(
+    PoolData* poolData)
+{
+    /* Query number of consumers */
+    NvSciError err =
+        NvSciStreamBlockConsumerCountGet(poolData->common.block,
+                                         &poolData->numConsumers);
+    if (NvSciError_Success != err) {
+        printf("Pool failed (%x) to query number of consumers", err);
+        return 0;
+    }
+
+    /* Query producer and consumer(s) endpoint info if needed */
+
+    return 1;
+}
+
+/*
+ * If ready, handle setup of the pool buffers.
+ *
+ * Most of the work the pool application has to do resides in this function.
+ */
+static int32_t handlePoolBufferSetup(
+    PoolData* poolData)
+{
+    NvSciError err;
+
+    /* Query producer element count */
+    err = NvSciStreamBlockElementCountGet(poolData->common.block,
+                                          NvSciStreamBlockType_Producer,
+                                          &poolData->numProdElem);
+    if (NvSciError_Success != err) {
+        printf("Pool failed (%x) to query producer element count\n", err);
+        return 0;
+    }
+
+    /* Query consumer element count */
+    err = NvSciStreamBlockElementCountGet(poolData->common.block,
+                                          NvSciStreamBlockType_Consumer,
+                                          &poolData->numConsElem);
+    if (NvSciError_Success != err) {
+        printf("Pool failed (%x) to query consumer element count\n", err);
+        return 0;
+    }
+
+    /* Query all producer elements */
+    for (uint32_t i=0U; i<poolData->numProdElem; ++i) {
+        err = NvSciStreamBlockElementAttrGet(poolData->common.block,
+                                             NvSciStreamBlockType_Producer, i,
+                                             &poolData->prodElem[i].userName,
+                                             &poolData->prodElem[i].attrList);
+        if (NvSciError_Success != err) {
+            printf("Pool failed (%x) to query producer element %d\n", err, i);
+            return 0;
+        }
+    }
+
+    /* Query all consumer elements */
+    for (uint32_t i=0U; i<poolData->numConsElem; ++i) {
+        err = NvSciStreamBlockElementAttrGet(poolData->common.block,
+                                             NvSciStreamBlockType_Consumer, i,
+                                             &poolData->consElem[i].userName,
+                                             &poolData->consElem[i].attrList);
+        if (NvSciError_Success != err) {
+            printf("Pool failed (%x) to query consumer element %d\n", err, i);
+            return 0;
+        }
+    }
+
+    /* Indicate that all element information has been imported */
+    poolData->elementsDone = true;
+    err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
+                                         NvSciStreamSetup_ElementImport,
+                                         true);
+    if (NvSciError_Success != err) {
+        printf("Pool failed (%x) to complete element import\n", err);
+        return 0;
+    }
+
+    /*
+     * Go through requested elements from producer and consumer and line
+     *   them up. A general streaming application might not have a one to
+     *   one correspondence, and the pool may have to decide what subset
+     *   of elements to select based on knowledge of the data types that
+     *   the application suite supports. This sample application is much
+     *   simpler, but we still go through the process rather than assuming
+     *   producer and consumer have requested the same things in the same
+     *   order.
+     */
+    uint32_t numElem = 0, p, c, e, i;
+    ElemAttr elem[MAX_ELEMS];
+    for (p=0; p<poolData->numProdElem; ++p) {
+        ElemAttr* prodElem = &poolData->prodElem[p];
+        for (c=0; c<poolData->numConsElem; ++c) {
+            ElemAttr* consElem = &poolData->consElem[c];
+
+            /* If requested element types match, combine the entries */
+            if (prodElem->userName == consElem->userName) {
+                ElemAttr* poolElem = &elem[numElem++];
+                poolElem->userName = prodElem->userName;
+                poolElem->attrList = NULL;
+
+                /* Combine and reconcile the attribute lists */
+                NvSciBufAttrList oldAttrList[2] = { prodElem->attrList,
+                                                    consElem->attrList };
+                NvSciBufAttrList conflicts = NULL;
+                err = NvSciBufAttrListReconcile(oldAttrList, 2,
+                                                &poolElem->attrList,
+                                                &conflicts);
+
+                /* Discard any conflict list.
+                 *  (Could report its contents for additional debug info)
+                 */
+                if (NULL != conflicts) {
+                    NvSciBufAttrListFree(conflicts);
+                }
+
+                /* Abort on error */
+                if (NvSciError_Success != err) {
+                    printf("Failed to reconcile element %x attrs (%x)\n",
+                           poolElem->userName, err);
+                    return 0;
+                }
+
+                /* Found a match for this producer element so move on */
+                break;
+            }  /* if match */
+        } /* for all requested consumer elements */
+    } /* for all requested producer elements */
+
+    /* Should be at least one element */
+    if (0 == numElem) {
+        printf("Pool didn't find any common elements\n");
+        return 0;
+    }
+
+    /* The requested attribute lists are no longer needed, so discard them */
+    for (p=0; p<poolData->numProdElem; ++p) {
+        ElemAttr* prodElem = &poolData->prodElem[p];
+        if (NULL != prodElem->attrList) {
+            NvSciBufAttrListFree(prodElem->attrList);
+            prodElem->attrList = NULL;
+        }
+    }
+    for (c=0; c<poolData->numConsElem; ++c) {
+        ElemAttr* consElem = &poolData->consElem[c];
+        if (NULL != consElem->attrList) {
+            NvSciBufAttrListFree(consElem->attrList);
+            consElem->attrList = NULL;
+        }
+    }
+
+    /* Inform the stream of the chosen elements */
+    for (e=0; e<numElem; ++e) {
+        ElemAttr* poolElem = &elem[e];
+        err = NvSciStreamBlockElementAttrSet(poolData->common.block,
+                                             poolElem->userName,
+                                             poolElem->attrList);
+        if (NvSciError_Success != err) {
+            printf("Pool failed (%x) to send element %d info\n", err, e);
+            return 0;
+        }
+    }
+
+    /* Indicate that all element information has been exported */
+    err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
+                                         NvSciStreamSetup_ElementExport,
+                                         true);
+    if (NvSciError_Success != err) {
+        printf("Consumer failed (%x) to complete element export\n", err);
+        return 0;
+    }
+
+    /*
+     * Create and send all the packets and their buffers
+     * Note: Packets and buffers are not guaranteed to be received by
+     *       producer and consumer in the same order sent, nor are the
+     *       status messages sent back guaranteed to preserve ordering.
+     *       This is one reason why an event driven model is more robust.
+     */
+    for (i=0; i<poolData->numPacket; ++i) {
+
+        /*
+         * Create a new packet
+         * Our pool implementation doesn't need to save any packet-specific
+         *   data, but we do need to provide unique cookies, so we just
+         *   use the pointer to the location we save the handle. For other
+         *   blocks, this will be a pointer to the structure where the
+         *   packet information is kept.
+         */
+        NvSciStreamCookie cookie = (NvSciStreamCookie)&poolData->packet[i];
+        err = NvSciStreamPoolPacketCreate(poolData->common.block,
+                                          cookie,
+                                          &poolData->packet[i]);
+        if (NvSciError_Success != err) {
+            printf("Failed (%x) to create packet %d\n", err, i);
+            return 0;
+        }
+
+        /* Create buffers for the packet */
+        for (e=0; e<numElem; ++e) {
+
+            /* Allocate a buffer object */
+            NvSciBufObj obj;
+            err = NvSciBufObjAlloc(elem[e].attrList, &obj);
+            if (NvSciError_Success != err) {
+                printf("Failed (%x) to allocate buffer %d of packet %d\n",
+                       err, e, i);
+                return 0;
+            }
+
+            /* Insert the buffer in the packet */
+            err = NvSciStreamPoolPacketInsertBuffer(poolData->common.block,
+                                                    poolData->packet[i],
+                                                    e, obj);
+            if (NvSciError_Success != err) {
+                printf("Failed (%x) to insert buffer %d of packet %d\n",
+                       err, e, i);
+                return 0;
+            }
+
+            /* The pool doesn't need to keep a copy of the object handle */
+            NvSciBufObjFree(obj);
+        }
+
+        /* Indicate packet setup is complete */
+        err = NvSciStreamPoolPacketComplete(poolData->common.block,
+                                            poolData->packet[i]);
+        if (NvSciError_Success != err) {
+            printf("Failed (%x) to complete packet %d setup\n",
+                   err, i);
+            return 0;
+        }
+    }
+
+    /*
+     * Indicate that all packets have been sent.
+     * Note: An application could choose to wait to send this until
+     *  the status has been received, in order to try to make any
+     *  corrections for rejected packets.
+     */
+    err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
+                                         NvSciStreamSetup_PacketExport,
+                                         true);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to complete packet export\n",
+               err);
+        return 0;
+    }
+
+    /* Once all packets are set up, no longer need to keep the attributes */
+    for (e=0; e<numElem; ++e) {
+        ElemAttr* poolElem = &elem[e];
+        if (NULL != poolElem->attrList) {
+            NvSciBufAttrListFree(poolElem->attrList);
+            poolElem->attrList = NULL;
+        }
+    }
+
+    return 1;
+}
+
+/*
+* If ready, handle setup of the C2C pool buffers.
+*
+* Most of the work the pool application has to do resides in this function.
+*/
+static int32_t handleC2cPoolBufferSetup(
+    PoolData* poolData)
+{
+    NvSciError err;
+
+    /* Query allocated element count from the primary pool */
+    uint32_t          numElem;
+    err = NvSciStreamBlockElementCountGet(poolData->common.block,
+                                          NvSciStreamBlockType_Producer,
+                                          &numElem);
+    if (NvSciError_Success != err) {
+        printf("C2C pool failed (%x) to query allocated element count\n",
+               err);
+        return 0;
+    }
+
+    /* Query all allocated elements from the primary pool */
+    ElemAttr elem[MAX_ELEMS];
+    for (uint32_t i = 0U; i<numElem; ++i) {
+        err = NvSciStreamBlockElementAttrGet(poolData->common.block,
+                                             NvSciStreamBlockType_Producer, i,
+                                             &elem[i].userName,
+                                             &elem[i].attrList);
+        if (NvSciError_Success != err) {
+            printf("C2C pool failed (%x) to query allocated element %d\n",
+                   err, i);
+            return 0;
+        }
+    }
+
+    /* If necessary, query the consumer elements for validation */
+
+    /* Indicate that all element information has been imported */
+    poolData->elementsDone = true;
+    err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
+                                         NvSciStreamSetup_ElementImport,
+                                         true);
+    if (NvSciError_Success != err) {
+        printf("C2C pool failed (%x) to complete element import\n", err);
+        return 0;
+    }
+
+    /*
+    * Create and send all the packets and their buffers
+    */
+    for (uint32_t i = 0; i<poolData->numPacket; ++i) {
+
+        /*
+        * Create a new packet
+        * Our pool implementation doesn't need to save any packet-specific
+        *   data, but we do need to provide unique cookies, so we just
+        *   use the pointer to the location we save the handle. For other
+        *   blocks, this will be a pointer to the structure where the
+        *   packet information is kept.
+        */
+        NvSciStreamCookie cookie = (NvSciStreamCookie)&poolData->packet[i];
+        err = NvSciStreamPoolPacketCreate(poolData->common.block,
+                                          cookie,
+                                          &poolData->packet[i]);
+        if (NvSciError_Success != err) {
+            printf("Failed (%x) to create packet %d\n", err, i);
+            return 0;
+        }
+
+        /* Create buffers for the packet */
+        for (uint32_t e = 0; e<numElem; ++e) {
+
+            /* Allocate a buffer object */
+            NvSciBufObj obj;
+            err = NvSciBufObjAlloc(elem[e].attrList, &obj);
+            if (NvSciError_Success != err) {
+                printf("Failed (%x) to allocate buffer %d of packet %d\n",
+                       err, e, i);
+                return 0;
+            }
+
+            /* Insert the buffer in the packet */
+            err = NvSciStreamPoolPacketInsertBuffer(poolData->common.block,
+                                                    poolData->packet[i],
+                                                    e, obj);
+            if (NvSciError_Success != err) {
+                printf("Failed (%x) to insert buffer %d of packet %d\n",
+                       err, e, i);
+                return 0;
+            }
+
+            /* The pool doesn't need to keep a copy of the object handle */
+            NvSciBufObjFree(obj);
+        }
+
+        /* Indicate packet setup is complete */
+        err = NvSciStreamPoolPacketComplete(poolData->common.block,
+                                            poolData->packet[i]);
+        if (NvSciError_Success != err) {
+            printf("Failed (%x) to complete packet %d setup\n",
+                   err, i);
+            return 0;
+        }
+    }
+
+    /*
+     * Indicate that all packets have been sent.
+     * Note: An application could choose to wait to send this until
+     *  the status has been received, in order to try to make any
+     *  corrections for rejected packets.
+     */
+    err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
+                                         NvSciStreamSetup_PacketExport,
+                                         true);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to complete packet export\n",
+               err);
+        return 0;
+    }
+
+    /* Once all packets are set up, no longer need to keep the attributes */
+    for (uint32_t e = 0; e<numElem; ++e) {
+        ElemAttr* poolElem = &elem[e];
+        if (NULL != poolElem->attrList) {
+            NvSciBufAttrListFree(poolElem->attrList);
+            poolElem->attrList = NULL;
+        }
+    }
+
+    return 1;
+}
+
+/* Check packet status */
+static int32_t handlePacketsStatus(
+    PoolData*     poolData)
+{
+    bool packetFailure = false;
+    NvSciError err;
+
+    /* Check each packet */
+    for (uint32_t p = 0; p < poolData->numPacket; ++p) {
+        /* Check packet acceptance */
+        bool accept;
+        err = NvSciStreamPoolPacketStatusAcceptGet(poolData->common.block,
+                                                   poolData->packet[p],
+                                                   &accept);
+        if (NvSciError_Success != err) {
+            printf("Failed (%x) to retrieve packet %d's acceptance-statue\n",
+                   err, p);
+            return 0;
+        }
+        if (accept) {
+            continue;
+        }
+
+        /* On rejection, query and report details */
+        packetFailure = true;
+        NvSciError status;
+
+        /* Check packet status from producer */
+        err = NvSciStreamPoolPacketStatusValueGet(
+                poolData->common.block,
+                poolData->packet[p],
+                NvSciStreamBlockType_Producer, 0U,
+                &status);
+        if (NvSciError_Success != err) {
+            printf("Failed (%x) to retrieve packet %d's statue from producer\n",
+                   err, p);
+            return 0;
+        }
+        if (status != NvSciError_Success) {
+            printf("Producer rejected packet %d with error %x\n", p, status);
+        }
+
+        /* Check packet status from consumers */
+        for (uint32_t c = 0; c < poolData->numConsumers; ++c) {
+            err = NvSciStreamPoolPacketStatusValueGet(
+                    poolData->common.block,
+                    poolData->packet[p],
+                    NvSciStreamBlockType_Consumer, c,
+                    &status);
+            if (NvSciError_Success != err) {
+                printf("Failed (%x) to retrieve packet %d's statue from consumer %d\n",
+                       err, p, c);
+                return 0;
+            }
+            if (status != NvSciError_Success) {
+                printf("Consumer %d rejected packet %d with error %x\n",
+                       c, p, status);
+            }
+        }
+    }
+
+    /* Indicate that status for all packets has been received. */
+    poolData->packetsDone = true;
+    err = NvSciStreamBlockSetupStatusSet(poolData->common.block,
+                                         NvSciStreamSetup_PacketImport,
+                                         true);
+    if (NvSciError_Success != err) {
+        printf("Pool failed (%x) to complete packets import\n", err);
+        return 0;
+    }
+
+    return packetFailure ? 0 : 1;
+}
+
+/* Handle events on a pool block
+ *
+ * The pool block coordinates allocation of packets based on producer
+ *   and consumer requirements during setup. After that, no further
+ *   events should be received until the stream is torn down.
+ */
+static int32_t handlePool(
+    void*     data,
+    uint32_t  wait)
+{
+    /* Cast to pool data */
+    PoolData* poolData = (PoolData*)data;
+
+    /* Get time to wait */
+    int64_t waitTime = wait ? poolData->common.waitTime : 0;
+
+    /* Query/wait for an event on the block */
+    NvSciStreamEventType event;
+    NvSciError       err;
+    err = NvSciStreamBlockEventQuery(poolData->common.block, waitTime, &event);
+
+    /* Handle errors */
+    if (NvSciError_Success != err) {
+
+        /* If not asked to wait, a timeout is not an error */
+        if (!waitTime && (NvSciError_Timeout == err)) {
+            return 0;
+        }
+
+        /* Otherwise, any error is considered fatal. A timeout probably
+         *   indicates a failure to connect and complete setup in a timely
+         *   fashion, so we specifically call out this case.
+         */
+        if (NvSciError_Timeout == err) {
+            printf("Pool timed out waiting for setup instructions\n");
+        } else {
+            printf("Pool event query failed with error %x\n", err);
+        }
+        deletePool(poolData);
+        return -1;
+    }
+
+    /* If we received an event, handle it based on its type
+     *
+     * Note that there's a lot of error checking we could choose to do for
+     *   some of these events, like making sure that we only receive each
+     *   event once for a given entry. But NvSciStream is expected to take
+     *   care of all of that, even when the application makes a mistake.
+     *   So we only check for things that don't trigger NvSciStream errors.
+     */
+    int32_t rv = 1;
+    NvSciError status;
+    switch (event) {
+    /*
+     * Any event we don't explicitly handle is a fatal error
+     */
+    default:
+        printf("Pool received unknown event %x\n", event);
+
+        rv = -1;
+        break;
+
+    /*
+     * Error events should never occur with safety-certified drivers,
+     *   and are provided only in non-safety builds for debugging
+     *   purposes. Even then, they should only occur when something
+     *   fundamental goes wrong, like the system running out of memory,
+     *   or stack/heap corruption, or a bug in NvSci which should be
+     *   reported to NVIDIA.
+     */
+    case NvSciStreamEventType_Error:
+        err = NvSciStreamBlockErrorGet(poolData->common.block, &status);
+        if (NvSciError_Success != err) {
+            printf("%s Failed to query the error event code %x\n",
+                   poolData->common.name, err);
+            rv = -1;
+        } else {
+            if ((opts.numLateConsumer > 0U) &&
+                (status == NvSciError_StreamNotConnected)) {
+                printf("[WARN] %s received error event: %x\n",
+                       poolData->common.name, status);
+                rv = 2;
+            } else {
+                printf("%s received error event: %x\n",
+                       poolData->common.name, status);
+                rv = -1;
+            }
+        }
+
+        break;
+
+    /*
+     * If told to disconnect, it means either the stream finished its
+     *   business or some other block had a failure. We'll just do a
+     *   clean up and return without an error. But if it happened before
+     *   all the pool setup operations finished, we'll report it for
+     *   debugging purposes.
+     */
+    case NvSciStreamEventType_Disconnected:
+        if (!poolData->elementsDone) {
+            printf("Warning: Pool disconnect before element support\n");
+        } else if (!poolData->packetsDone) {
+            printf("Warning: Pool disconnect before packet setup\n");
+        }
+        rv = 2;
+        break;
+
+    /*
+     * The pool doesn't have to do anything immediately on connection, but
+     *   now that the stream is complete we can reduce the timeout to wait
+     *   for the producer and consumer events to arrive.
+     */
+    case NvSciStreamEventType_Connected:
+        /* Initialize basic stream info */
+        if (!handleStreamInit(poolData)) {
+            rv = -1;
+        }
+
+        poolData->common.waitTime = 10 * 1000000;
+        break;
+
+    /* Process all element support from producer and consumer(s) */
+    case NvSciStreamEventType_Elements:
+        if (poolData->isC2cPool) {
+            if (!handleC2cPoolBufferSetup(poolData)) {
+                rv = -1;
+            }
+        } else {
+            if (!handlePoolBufferSetup(poolData)) {
+                rv = -1;
+            }
+        }
+        break;
+
+    /*
+     * Check packet/buffer status returned from producer/consumer
+     *   A more sophisticated application might have the means to recover
+     *   from any failures. But in general we expect that in a production
+     *   application, any failures are due to something fundamental going
+     *   wrong like lack of memory/resources, which hopefully has been
+     *   designed out. So these status checks are more useful during
+     *   development, where we just report the issue for debugging purposes.
+     *
+     * Once all the status events have been received for all packets
+     *   and buffers, the pool should require no further interaction
+     *   until the time comes to shut down the application. We set the
+     *   wait time to infinite.
+     */
+    case NvSciStreamEventType_PacketStatus:
+        /* There are multiple ways the status handling could be organized.
+        *   In particular, waiting for status could be interleaved with
+        *   sending the packets. This example waits for status from all
+        *   packets before checking each packet's status.
+        */
+        if (++poolData->numPacketReady < poolData->numPacket) {
+            break;
+        }
+
+        if (!handlePacketsStatus(poolData)) {
+            rv = -1;
+        }
+
+        poolData->common.waitTime = -1;
+        break;
+
+    /* All setup complete. Transition to runtime phase */
+    case NvSciStreamEventType_SetupComplete:
+        break;
+    }
+
+    /* On failure or final event, clean up the block */
+    if ((rv < 0) || (1 < rv)) {
+        deletePool(poolData);
+    }
+
+    return rv;
+}
+
+
+/* Create and register a new pool block */
+int32_t createPool_Common(
+    NvSciStreamBlock* pool,
+    uint32_t          numPacket,
+    bool              isC2cPool)
+{
+    /* Create a data structure to track the block's status */
+    PoolData* poolData = (PoolData*)calloc(1, sizeof(PoolData));
+    if (NULL == poolData) {
+        printf("Failed to allocate data structure for pool\n");
+        return 0;
+    }
+
+    /* Save the name for debugging purposes */
+    strcpy(poolData->common.name, "Pool");
+
+    /* Save the c2c pool flag */
+    poolData->isC2cPool = isC2cPool;
+
+    /* Save the packet count */
+    poolData->numPacket = numPacket;
+
+    /* Wait time for initial connection event will be 60 seconds */
+    poolData->common.waitTime = 60 * 1000000;
+
+    /* Create a pool block */
+    NvSciError err =
+        NvSciStreamStaticPoolCreate(poolData->numPacket,
+                                    &poolData->common.block);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create pool block\n", err);
+        deletePool(poolData);
+        return 0;
+    }
+
+    /* Register block with event handling mechanism */
+    if (!eventFuncs->reg(poolData->common.block, poolData, handlePool)) {
+        deletePool(poolData);
+        return 0;
+    }
+
+    *pool = poolData->common.block;
+    return 1;
+}
diff --git a/event_sample_app/block_presentsync.c b/event_sample_app/block_presentsync.c
new file mode 100644
index 0000000..1e4c5b7
--- /dev/null
+++ b/event_sample_app/block_presentsync.c
@@ -0,0 +1,55 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - PresentSync block
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "nvscistream.h"
+#include "block_info.h"
+#include "event_loop.h"
+
+/*
+ * PresentSync blocks do not require any block-specific interactions so we
+ *   use the set of common functions to handle its events.
+ */
+
+/* Create and register a new presentSync block */
+int32_t createPresentSync(
+    NvSciStreamBlock* presentSync)
+{
+    /* Create a data structure to track the block's status */
+    BlockData* blockData = createCommon("PresentSync", 0);
+    if (NULL == blockData) {
+        return 0;
+    }
+
+    /* Create a PresentSync block */
+    NvSciError err =
+        NvSciStreamPresentSyncCreate(sciSyncModule, &blockData->block);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create PresentSync block\n", err);
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    /* Register block with event handling mechanism */
+    if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    *presentSync = blockData->block;
+    return 1;
+}
diff --git a/event_sample_app/block_producer_uc1.c b/event_sample_app/block_producer_uc1.c
new file mode 100644
index 0000000..62ccb7b
--- /dev/null
+++ b/event_sample_app/block_producer_uc1.c
@@ -0,0 +1,1339 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - producer block for use case 1
+ *
+ * This implements the producer for use case 1: cuda to cuda streaming
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "nvscibuf.h"
+#include "nvscisync.h"
+#include "nvscistream.h"
+#include "cuda.h"
+#include "cuda_runtime_api.h"
+#include "block_info.h"
+#include "event_loop.h"
+#include "usecase1.h"
+#include "util.h"
+
+/* Internal data structure used to track packets */
+typedef struct {
+    /* The packet handle use for NvSciStream functions */
+    NvSciStreamPacket    handle;
+    /* NvSci buffer object for the packet's data buffer */
+    NvSciBufObj          dataObj;
+    /* CUDA external memory handle for the data buffer */
+    cudaExternalMemory_t dataExtMem;
+    /* CUDA device memory pointer for the data buffer */
+    void*                dataDevMem;
+    /* Local system memory buffer used as the source for CUDA operations */
+    uint8_t*             dataSrcMem;
+    /* NvSci buffer object for the packet's CRC buffer */
+    NvSciBufObj          crcObj;
+    /* Virtual address for the CRC buffer */
+    uint8_t*             crcPtr;
+    /* Fence for the latest payload using this packet */
+    NvSciSyncFence       fence;
+} ProdPacket;
+
+/* Internal data used by the producer block */
+typedef struct {
+    /* Common block info */
+    BlockData               common;
+
+    /* Number of consumers */
+    uint32_t                numConsumers;
+
+    /* CUDA device ID and UUID */
+    int32_t                 cudaDeviceId;
+    CUuuid                  cudaUuid;
+
+    /* CUDA producer stream */
+    cudaStream_t            cudaStream;
+
+    /* NvSciSync context to do CPU waiting for fences */
+    NvSciSyncCpuWaitContext cpuWaitContext;
+    /* Sync attributes for CPU waiting */
+    NvSciSyncAttrList       cpuWaitAttr;
+
+    /* CUDA sync attributes required for signaling */
+    NvSciSyncAttrList       signalAttr;
+    /* CUDA sync attributes required for waiting */
+    NvSciSyncAttrList       waiterAttr;
+    /* Sync object for CUDA to signal after generating data */
+    NvSciSyncObj            signalObj;
+    /* CUDA semaphore mapped to sync object */
+    cudaExternalSemaphore_t signalSem;
+    /* Sync objects to wait for before generating data */
+    NvSciSyncObj            waiterObj[MAX_CONSUMERS];
+    /* CUDA semaphores mapped to sync objects */
+    cudaExternalSemaphore_t waiterSem[MAX_CONSUMERS];
+
+    /* Element index chosen by pool for the CRC buffer */
+    uint32_t                crcIndex;
+    /* Element index chosen by pool for the data buffer */
+    uint32_t                dataIndex;
+    /* Size for data buffer after reconciling all requirements */
+    uint64_t                dataSize;
+    /* Number of packets provided by pool */
+    uint32_t                numPacket;
+    /* Information about each packet */
+    ProdPacket              packets[MAX_PACKETS];
+
+    /* Number of payloads generated so far */
+    uint32_t                counter;
+    /* Index of dataSrcMem array */
+    uint32_t                idxSrcMem;
+    /* Flag indicating producer has finished generating all payloads */
+    uint32_t                finished;
+    /* Number of frames that producer is configured to send */
+    uint32_t                framesDesired;
+} ProdData;
+
+/* Free up the packet resources */
+static void deletePacket(
+    ProdPacket* packet)
+{
+    if (packet != NULL) {
+        if (packet->handle != NvSciStreamPacket_Invalid) {
+            /* Free CUDA memory mapping */
+            (void)cudaFree(packet->dataDevMem);
+            if (packet->dataExtMem) {
+                (void)cudaDestroyExternalMemory(packet->dataExtMem);
+                packet->dataExtMem = 0;
+            }
+
+            if (packet->dataSrcMem) {
+                free(packet->dataSrcMem);
+                packet->dataSrcMem = NULL;
+            }
+
+            /* Free buffer objects */
+            if (packet->dataObj) {
+                NvSciBufObjFree(packet->dataObj);
+                packet->dataObj = NULL;
+            }
+
+            if (packet->crcObj) {
+                NvSciBufObjFree(packet->crcObj);
+                packet->crcObj = NULL;
+            }
+
+            /* Clear the fences */
+            NvSciSyncFenceClear(&packet->fence);
+        }
+
+        /* Clear out packet information */
+        memset(packet, 0, sizeof(ProdPacket));
+    }
+}
+
+/* Free up producer block resources */
+static void deleteProducer(
+    ProdData* prodData)
+{
+    /* Destroy block */
+    if (prodData->common.block != 0) {
+        (void)NvSciStreamBlockDelete(prodData->common.block);
+        prodData->common.block = 0;
+    }
+
+    /* Free the packet resources */
+    for (uint32_t i=0;i<prodData->numPacket; i++) {
+        deletePacket(&prodData->packets[i]);
+    }
+
+    /* Free the sync objects */
+    for (uint32_t i=0; i< prodData->numConsumers; i++) {
+        if (prodData->waiterObj[i] != NULL) {
+            (void)cudaDestroyExternalSemaphore(prodData->waiterSem[i]);
+            prodData->waiterSem[i] = 0;
+            NvSciSyncObjFree(prodData->waiterObj[i]);
+            prodData->waiterObj[i] = NULL;
+        }
+    }
+
+    if (prodData->signalObj != NULL) {
+        (void)cudaDestroyExternalSemaphore(prodData->signalSem);
+        prodData->signalSem = 0;
+        NvSciSyncObjFree(prodData->signalObj);
+        prodData->signalObj = NULL;
+    }
+
+    /* Free the cpu waiters */
+    if (prodData->cpuWaitAttr != NULL) {
+        NvSciSyncAttrListFree(prodData->cpuWaitAttr);
+        prodData->cpuWaitAttr = NULL;
+    }
+
+    /* Free the CPU wait contetxt */
+    if (prodData->cpuWaitContext != NULL) {
+        NvSciSyncCpuWaitContextFree(prodData->cpuWaitContext);
+        prodData->cpuWaitContext = NULL;
+    }
+
+    /* Destroy CUDA stream */
+    (void)cudaStreamDestroy(prodData->cudaStream);
+
+    /* Free data */
+    free(prodData);
+}
+
+/* Handle query of basic stream info */
+static int32_t handleStreamInit(
+    ProdData* prodData)
+{
+    /* Query number of consumers */
+    NvSciError err =
+        NvSciStreamBlockConsumerCountGet(prodData->common.block,
+                                         &prodData->numConsumers);
+
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to query the number of consumers\n", err);
+        return 0;
+    }
+
+    if (opts.endInfo) {
+    /* Query endpoint info from all consumers */
+        for (uint32_t i = 0U; i < prodData->numConsumers; i++) {
+            uint32_t size = INFO_SIZE;
+            char info[INFO_SIZE] = {0};
+            err = NvSciStreamBlockUserInfoGet(
+                    prodData->common.block,
+                    NvSciStreamBlockType_Consumer, i,
+                    ENDINFO_NAME_PROC,
+                    &size, &info);
+            if (NvSciError_Success == err) {
+                printf("Consumer %i info: %s\n", i, info);
+            } else if (NvSciError_StreamInfoNotProvided == err) {
+                printf("Info not provided by the consumer %d\n", i);
+            } else {
+                printf("Failed (%x) to query the consumer %d info\n", err, i);
+                return 0;
+            }
+        }
+    }
+
+    return 1;
+}
+
+/* Handle initialization of CUDA resources for producer */
+static int32_t handleProducerInit(
+    ProdData* prodData)
+{
+    int32_t  cudaRtErr;
+    CUresult cudaErr;
+
+    /* Get stack limit */
+    size_t   unused;
+    cudaRtErr = cudaDeviceGetLimit(&unused, cudaLimitStackSize);
+    if (cudaSuccess != cudaRtErr) {
+        printf("Failed (%d) to get CUDA device limit\n", cudaRtErr);
+        return 0;
+    }
+
+    /* Set CUDA device */
+    prodData->cudaDeviceId = 0;
+    cudaRtErr = cudaSetDevice(prodData->cudaDeviceId);
+    if (cudaSuccess != cudaRtErr) {
+        printf("Failed (%d) to set CUDA device\n", cudaRtErr);
+        return 0;
+    }
+
+    /* Get UUID for CUDA device */
+    cudaErr = cuDeviceGetUuid(&prodData->cudaUuid, prodData->cudaDeviceId);
+    if (CUDA_SUCCESS != cudaErr) {
+        printf("Failed (%d) to get CUDA UUID\n", cudaErr);
+        return 0;
+    }
+
+    /* Get CUDA stream for asynchronous operation */
+    cudaRtErr = cudaStreamCreateWithFlags(&prodData->cudaStream,
+                                          cudaStreamNonBlocking);
+    if (cudaSuccess != cudaRtErr) {
+        printf("Failed (%d) to create CUDA stream\n", cudaRtErr);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Handle setup of supported buffer attributes */
+static int32_t handleProducerElemSupport(
+    ProdData* prodData)
+{
+    /*
+     * Note: To illustrate that NvSciStream producer and consumer do
+     *       not need to specify the same set of element types, or use
+     *       the same order for element types, the producer for this
+     *       use case sends the CRC attributes first, followed by the
+     *       primary data, while the consumer uses the opposite order.
+     *       Our pool implementation will end up using the producer
+     *       ordering, but that is not required either.
+     */
+
+    NvSciError             sciErr;
+    uint32_t               bufName[2];
+    NvSciBufAttrList       bufAttrs[2];
+
+    /*
+     * CRC buffer requires write access by CPU, and uses a raw 64 byte
+     *   data buffer with 1 byte alignment.
+     */
+    NvSciBufAttrValAccessPerm crcPerm = NvSciBufAccessPerm_ReadWrite;
+    uint8_t crcCpu                    = 1U;
+    NvSciBufType crcBufType           = NvSciBufType_RawBuffer;
+    uint64_t crcSize                  = 64U;
+    uint64_t crcAlign                 = 1U;
+    NvSciBufAttrKeyValuePair crcKeyVals[] = {
+        { NvSciBufGeneralAttrKey_Types, &crcBufType, sizeof(crcBufType) },
+        { NvSciBufRawBufferAttrKey_Size, &crcSize, sizeof(crcSize) },
+        { NvSciBufRawBufferAttrKey_Align, &crcAlign, sizeof(crcAlign) },
+        { NvSciBufGeneralAttrKey_RequiredPerm, &crcPerm, sizeof(crcPerm) },
+        { NvSciBufGeneralAttrKey_NeedCpuAccess, &crcCpu, sizeof(crcCpu) }
+    };
+
+    /* Create and fill attribute list for CRC checksum buffer */
+    bufName[0] = ELEMENT_NAME_CRC;
+    sciErr = NvSciBufAttrListCreate(sciBufModule, &bufAttrs[0]);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to create CRC attribute list\n", sciErr);
+        return 0;
+    }
+    sciErr = NvSciBufAttrListSetAttrs(bufAttrs[0],
+                                      crcKeyVals,
+                                      sizeof(crcKeyVals) /
+                                          sizeof(NvSciBufAttrKeyValuePair));
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to fill CRC attribute list\n", sciErr);
+        return 0;
+    }
+
+    /*
+     * Data buffer requires write access by CPU and the GPU of the cuda
+     *   device, and uses a raw 128KB data buffer with 4KB alignment.
+     */
+    NvSciBufAttrValAccessPerm dataPerm = NvSciBufAccessPerm_ReadWrite;
+    uint8_t dataCpu                    = 1U;
+    NvSciRmGpuId dataGpu               = { 0 };
+    NvSciBufType dataBufType           = NvSciBufType_RawBuffer;
+    uint64_t dataSize                  = 128U * 1024U;
+    uint64_t dataAlign                 =   4U * 1024U;
+    memcpy(&dataGpu.bytes, &prodData->cudaUuid.bytes, sizeof(dataGpu.bytes));
+    NvSciBufAttrKeyValuePair dataKeyVals[] = {
+        { NvSciBufGeneralAttrKey_GpuId, &dataGpu, sizeof(dataGpu) },
+        { NvSciBufGeneralAttrKey_Types, &dataBufType, sizeof(dataBufType) },
+        { NvSciBufRawBufferAttrKey_Size, &dataSize, sizeof(dataSize) },
+        { NvSciBufRawBufferAttrKey_Align, &dataAlign, sizeof(dataAlign) },
+        { NvSciBufGeneralAttrKey_RequiredPerm, &dataPerm, sizeof(dataPerm) },
+        { NvSciBufGeneralAttrKey_NeedCpuAccess, &dataCpu, sizeof(dataCpu) }
+    };
+
+    /* Create and fill attribute list for data buffer */
+    bufName[1] = ELEMENT_NAME_DATA;
+    sciErr = NvSciBufAttrListCreate(sciBufModule, &bufAttrs[1]);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to create data attribute list\n", sciErr);
+        return 0;
+    }
+    sciErr = NvSciBufAttrListSetAttrs(bufAttrs[1],
+                                      dataKeyVals,
+                                      sizeof(dataKeyVals) /
+                                          sizeof(NvSciBufAttrKeyValuePair));
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to fill data attribute list\n", sciErr);
+        return 0;
+    }
+
+    /*
+     * Inform stream of the attributes
+     *   Once sent, the attribute lists are no longer needed
+     */
+    for (uint32_t i=0; i<2U; ++i) {
+        sciErr = NvSciStreamBlockElementAttrSet(prodData->common.block,
+                                                bufName[i], bufAttrs[i]);
+        if (NvSciError_Success != sciErr) {
+            printf("Producer failed (%x) to send element %d attribute\n",
+                   sciErr, i);
+            return 0;
+        }
+        NvSciBufAttrListFree(bufAttrs[i]);
+    }
+
+    /* Indicate that all element information has been exported */
+    sciErr = NvSciStreamBlockSetupStatusSet(prodData->common.block,
+                                            NvSciStreamSetup_ElementExport,
+                                            true);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to complete element export\n", sciErr);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Handle receipt of chosen element attributes */
+static int32_t handleProducerElemSetting(
+    ProdData*              prodData)
+{
+    NvSciError err;
+
+    /*
+     * This application does not need to query the element count, because we
+     *   know it is always 2. But we do so anyways to show how it is done.
+     */
+    uint32_t count;
+    err = NvSciStreamBlockElementCountGet(prodData->common.block,
+                                          NvSciStreamBlockType_Pool,
+                                          &count);
+    if (NvSciError_Success != err) {
+        printf("Producer failed (%x) to query element count\n", err);
+        return 0;
+    }
+    if (2U != count) {
+        printf("Producer received unexpected element count (%d)\n", count);
+        return 0;
+    }
+
+    /* Process all elements */
+    for (uint32_t i=0U; i<2U; ++i) {
+
+        /* Query element type and attributes */
+        uint32_t type;
+        NvSciBufAttrList bufAttr;
+        err = NvSciStreamBlockElementAttrGet(prodData->common.block,
+                                             NvSciStreamBlockType_Pool, i,
+                                             &type, &bufAttr);
+        if (NvSciError_Success != err) {
+            printf("Producer failed (%x) to query element attr %d\n", err, i);
+            return 0;
+        }
+
+        /* For data element, need to extract size and save index */
+        if (ELEMENT_NAME_DATA == type) {
+            prodData->dataIndex = i;
+            NvSciBufAttrKeyValuePair keyVals[] = {
+                { NvSciBufRawBufferAttrKey_Size, NULL, 0 }
+            };
+            err = NvSciBufAttrListGetAttrs(bufAttr, keyVals, 1);
+            if (NvSciError_Success != err) {
+                printf("Producer failed (%x) to obtain buffer size\n", err);
+                return 0;
+            }
+            prodData->dataSize = *((const uint64_t*)(keyVals[0].value));
+
+            /* Set waiter attributes for the asynchronous element. */
+            err = NvSciStreamBlockElementWaiterAttrSet(prodData->common.block,
+                                                       i,
+                                                       prodData->waiterAttr);
+            if (NvSciError_Success != err) {
+                printf("Producer failed (%x) to send waiter attr for elem %d\n",
+                       err, i);
+                return 0;
+            }
+
+            /* Once sent, the waiting attributes are no longer needed */
+            NvSciSyncAttrListFree(prodData->waiterAttr);
+            prodData->waiterAttr = NULL;
+        }
+
+        /* For CRC element, just need to save the index */
+        else if (ELEMENT_NAME_CRC == type) {
+            prodData->crcIndex = i;
+
+            /* CRC element is a synchronous element.
+             * Pass NULL for the attr to indicate no sync object is needed.
+             * This call could be omitted since NULL is the default. */
+            err = NvSciStreamBlockElementWaiterAttrSet(prodData->common.block,
+                                                       i, NULL);
+            if (NvSciError_Success != err) {
+                printf("Producer failed (%x) to send waiter attr for elem %d\n",
+                       err, i);
+                return 0;
+            }
+        }
+
+        /* Report any unknown element */
+        else {
+            printf("Producer received unknown element type (%x)\n", type);
+            return 0;
+        }
+
+        /* Don't need to keep attribute list */
+        NvSciBufAttrListFree(bufAttr);
+    }
+
+    /* Indicate that element import is complete */
+    err = NvSciStreamBlockSetupStatusSet(prodData->common.block,
+                                         NvSciStreamSetup_ElementImport,
+                                         true);
+    if (NvSciError_Success != err) {
+        printf("Producer failed (%x) to complete element import\n", err);
+        return 0;
+    }
+
+    /* Indicate that waiter attribute export is done. */
+    err = NvSciStreamBlockSetupStatusSet(prodData->common.block,
+                                         NvSciStreamSetup_WaiterAttrExport,
+                                         true);
+    if (NvSciError_Success != err) {
+        printf("Producer failed (%x) to complete waiter attr export\n", err);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Handle creation of a new packet */
+static int32_t handleProducerPacketCreate(
+    ProdData*         prodData)
+{
+    NvSciError err;
+
+    /* Retrieve handle for packet pending creation */
+    NvSciStreamPacket handle;
+    err = NvSciStreamBlockPacketNewHandleGet(prodData->common.block,
+                                             &handle);
+    if (NvSciError_Success != err) {
+        printf("Producer failed (%x) to retrieve handle for the new packet\n",
+               err);
+        return 0;
+    }
+
+    /* Make sure there is room for more packets */
+    if (MAX_PACKETS <= prodData->numPacket) {
+        printf("Producer exceeded max packets\n");
+        err = NvSciStreamBlockPacketStatusSet(prodData->common.block,
+                                              handle,
+                                              NvSciStreamCookie_Invalid,
+                                              NvSciError_Overflow);
+        if (NvSciError_Success != err) {
+            printf("Producer failed (%x) to send packet status\n", err);
+        }
+        return 0;
+    }
+
+    /*
+     * Allocate the next entry in the array for the new packet.
+     *   Use the array entry for the cookie
+     */
+    ProdPacket* packet = &prodData->packets[prodData->numPacket++];
+    packet->handle = handle;
+    packet->fence = NvSciSyncFenceInitializer;
+
+    /* Retrieve all buffers and map into application
+     *   This use case has 2 elements.
+     */
+    for (uint32_t index = 0; index < 2; index++) {
+        NvSciBufObj bufObj;
+        err = NvSciStreamBlockPacketBufferGet(prodData->common.block,
+                                              handle,
+                                              index,
+                                              &bufObj);
+        if (NvSciError_Success != err) {
+            printf("Producer failed (%x) to retrieve buffer (%lx/%d)\n",
+                   err, handle, index);
+            return 0;
+        }
+
+        /* Handle mapping of data buffer */
+        NvSciError sciErr;
+        int32_t    cudaRtErr;
+
+        if (index == prodData->dataIndex) {
+
+            /* Save buffer object */
+            packet->dataObj = bufObj;
+
+            /* Map in the buffer as CUDA external memory */
+            struct cudaExternalMemoryHandleDesc memHandleDesc;
+            memset(&memHandleDesc, 0, sizeof(memHandleDesc));
+            memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
+            memHandleDesc.handle.nvSciBufObject = bufObj;
+            memHandleDesc.size = prodData->dataSize;
+            cudaRtErr = cudaImportExternalMemory(&packet->dataExtMem,
+                                                 &memHandleDesc);
+            if (cudaSuccess != cudaRtErr) {
+                printf("Producer failed (%d) to map buffer as external mem\n",
+                       cudaRtErr);
+                return 0;
+            }
+
+            /* Map in the buffer as CUDA device memory */
+            struct cudaExternalMemoryBufferDesc memBufferDesc;
+            memset(&memBufferDesc, 0, sizeof(memBufferDesc));
+            memBufferDesc.size = prodData->dataSize;
+            memBufferDesc.offset = 0;
+            cudaRtErr = cudaExternalMemoryGetMappedBuffer(&packet->dataDevMem,
+                                                          packet->dataExtMem,
+                                                          &memBufferDesc);
+            if (cudaSuccess != cudaRtErr) {
+                printf("Producer failed (%d) to map buffer as device mem\n",
+                       cudaRtErr);
+                return 0;
+            }
+
+            /* Allocate normal memory to use as the source for the CUDA op */
+            packet->dataSrcMem = (uint8_t*)malloc(prodData->dataSize);
+            if (NULL == packet->dataSrcMem) {
+                printf("Producer failed to allocate source buffer\n");
+                return 0;
+            }
+
+            /* Fill in with initial values */
+            memset(packet->dataSrcMem, 0x5A, prodData->dataSize);
+
+        }
+
+        /* Handle mapping of CRC buffer */
+        else if (index == prodData->crcIndex) {
+
+            /* Save buffer object */
+            packet->crcObj = bufObj;
+
+            /* Get a CPU pointer for the buffer from NvSci */
+            sciErr = NvSciBufObjGetCpuPtr(bufObj, (void**)&packet->crcPtr);
+            if (NvSciError_Success != sciErr) {
+                printf("Producer failed (%x) to map CRC buffer\n", sciErr);
+                return 0;
+            }
+
+        }
+
+        /* Shouldn't be any other index */
+        else {
+            printf("Producer received buffer for unknown element (%d)\n",
+                   index);
+            return 0;
+        }
+
+    }
+
+    /* Inform pool of success.
+     *   Note: Could inform the pool of any of the failures above.
+     */
+    err = NvSciStreamBlockPacketStatusSet(prodData->common.block,
+                                          handle,
+                                          (NvSciStreamCookie)packet,
+                                          NvSciError_Success);
+    if (NvSciError_Success != err) {
+        printf("Producer failed (%x) to inform pool of packet status\n", err);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Handle deletion of packet */
+static void handleProducerPacketDelete(
+    ProdData*         prodData)
+{
+    /* Get the deleted packet cookie*/
+    NvSciStreamCookie cookie;
+    NvSciError err =
+        NvSciStreamBlockPacketOldCookieGet(prodData->common.block,
+                                           &cookie);
+    if (NvSciError_Success != err) {
+        printf("Producer failed (%x) to retrieve the deleted packet cookie\n",
+               err);
+    }
+
+    /* Get packet pointer */
+    ProdPacket* packet = (ProdPacket*)cookie;
+
+    /* Free the packet resources */
+    deletePacket(packet);
+}
+
+/* Handle setup of supported sync attributes */
+static int32_t handleProducerSyncSupport(
+    ProdData* prodData)
+{
+    NvSciError       sciErr;
+    int32_t          cudaRtErr;
+
+    /*
+     * Create sync attribute list for signaling.
+     *   This will be saved until we receive the consumer's attributes
+     */
+    sciErr = NvSciSyncAttrListCreate(sciSyncModule, &prodData->signalAttr);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to allocate signal sync attrs\n", sciErr);
+        return 0;
+    }
+
+    /* Have CUDA fill the signaling attribute list */
+    cudaRtErr = cudaDeviceGetNvSciSyncAttributes(prodData->signalAttr,
+                                                 prodData->cudaDeviceId,
+                                                 cudaNvSciSyncAttrSignal);
+    if (cudaSuccess != cudaRtErr) {
+        printf("Producer failed (%d) to fill signal sync attrs\n", cudaRtErr);
+        return 0;
+    }
+
+    /* Create sync attribute list for waiting. */
+    sciErr = NvSciSyncAttrListCreate(sciSyncModule, &prodData->waiterAttr);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to allocate waiter sync attrs\n", sciErr);
+        return 0;
+    }
+
+    /* Have CUDA fill the waiting attribute list */
+    cudaRtErr = cudaDeviceGetNvSciSyncAttributes(prodData->waiterAttr,
+                                                 prodData->cudaDeviceId,
+                                                 cudaNvSciSyncAttrWait);
+    if (cudaSuccess != cudaRtErr) {
+        printf("Producer failed (%d) to fill waiter sync attrs\n", cudaRtErr);
+        return 0;
+    }
+
+    /*
+     * Most producers will only need to signal their own sync objects and
+     *   wait for the consumer sync object(s). But to protect a local
+     *   data buffer, this producer will also need the ability to do
+     *   CPU waits on the sync objects it signals.
+     */
+
+    /* Create attribute list for CPU waiting */
+    sciErr = NvSciSyncAttrListCreate(sciSyncModule, &prodData->cpuWaitAttr);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to allocate cpu wait sync attrs\n",
+               sciErr);
+        return 0;
+    }
+
+    /* Fill attribute list for CPU waiting */
+    uint8_t                   cpuSync = 1;
+    NvSciSyncAccessPerm       cpuPerm = NvSciSyncAccessPerm_WaitOnly;
+    NvSciSyncAttrKeyValuePair cpuKeyVals[] = {
+        { NvSciSyncAttrKey_NeedCpuAccess, &cpuSync, sizeof(cpuSync) },
+        { NvSciSyncAttrKey_RequiredPerm,  &cpuPerm, sizeof(cpuPerm) }
+    };
+    sciErr = NvSciSyncAttrListSetAttrs(prodData->cpuWaitAttr, cpuKeyVals, 2);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to fill cpu wait sync attrs\n", sciErr);
+        return 0;
+    }
+
+    /* Create a context for CPU waiting */
+    sciErr = NvSciSyncCpuWaitContextAlloc(sciSyncModule,
+                                          &prodData->cpuWaitContext);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to allocate cpu wait context\n", sciErr);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Handle creation and export of producer sync object */
+static int32_t handleProducerSyncExport(
+    ProdData*         prodData)
+{
+    NvSciError        sciErr;
+    uint32_t          cudaRtErr;
+
+    /* Process waiter attrs from all elements.
+     * As CRC element is a synchronous element,
+     * no need to query the sync object for it.
+     */
+    NvSciSyncAttrList waiterAttr = NULL;
+    sciErr = NvSciStreamBlockElementWaiterAttrGet(prodData->common.block,
+                                                  prodData->dataIndex,
+                                                  &waiterAttr);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to query waiter attr\n",
+                sciErr);
+        return 0;
+    }
+    if (NULL == waiterAttr) {
+        printf("Producer received NULL waiter attr for data elem\n");
+        return 0;
+    }
+
+    /* Indicate that waiter attribute import is done. */
+    sciErr = NvSciStreamBlockSetupStatusSet(prodData->common.block,
+                                            NvSciStreamSetup_WaiterAttrImport,
+                                            true);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to complete waiter attr import\n",
+               sciErr);
+        return 0;
+    }
+
+    /*
+     * Merge and reconcile consumer sync attrs with ours.
+     * Note: Many producers would only require their signaler attributes
+     *       and the consumer waiter attributes. As noted above, we also
+     *       add in attributes to allow us to CPU wait for the syncs that
+     *       we signal.
+     */
+    NvSciSyncAttrList unreconciled[3] = {
+        prodData->signalAttr,
+        waiterAttr,
+        prodData->cpuWaitAttr };
+    NvSciSyncAttrList reconciled = NULL;
+    NvSciSyncAttrList conflicts = NULL;
+    sciErr = NvSciSyncAttrListReconcile(unreconciled, 3,
+                                        &reconciled, &conflicts);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to reconcile sync attributes\n", sciErr);
+        return 0;
+    }
+
+    /* Allocate sync object */
+    sciErr = NvSciSyncObjAlloc(reconciled, &prodData->signalObj);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to allocate sync object\n", sciErr);
+        return 0;
+    }
+
+    /* Free the attribute lists */
+    NvSciSyncAttrListFree(prodData->signalAttr);
+    prodData->signalAttr = NULL;
+    NvSciSyncAttrListFree(waiterAttr);
+    NvSciSyncAttrListFree(reconciled);
+
+    /* Create CUDA semaphore for sync object */
+    struct cudaExternalSemaphoreHandleDesc extSemDesc;
+    memset(&extSemDesc, 0, sizeof(extSemDesc));
+    extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
+    extSemDesc.handle.nvSciSyncObj = prodData->signalObj;
+    cudaRtErr = cudaImportExternalSemaphore(&prodData->signalSem,
+                                            &extSemDesc);
+    if (cudaSuccess != cudaRtErr) {
+        printf("Producer failed (%d) to map signal object to semaphore\n",
+               cudaRtErr);
+        return 0;
+    }
+
+    /* Only send the sync object for the asynchronous element.
+     * If this function is not called for an element,
+     * the sync object is assumed to be NULL.
+     * In this use case, CRC element doesn't use sync object.
+     */
+    sciErr = NvSciStreamBlockElementSignalObjSet(prodData->common.block,
+                                                 prodData->dataIndex,
+                                                 prodData->signalObj);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to send sync object\n", sciErr);
+        return 0;
+    }
+
+    /* Indicate that sync object export is complete */
+    sciErr = NvSciStreamBlockSetupStatusSet(prodData->common.block,
+                                            NvSciStreamSetup_SignalObjExport,
+                                            true);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to complete signal obj export\n",
+               sciErr);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Handle import of consumer sync object */
+static int32_t handleProducerSyncImport(
+    ProdData*         prodData)
+{
+    uint32_t          cudaRtErr;
+    NvSciError        sciErr;
+
+    /* Query sync objects for asynchronous elements
+     * from all consumers.
+     */
+    for (uint32_t c = 0U; c < prodData->numConsumers; c++) {
+        NvSciSyncObj waiterObj = NULL;
+        sciErr = NvSciStreamBlockElementSignalObjGet(
+                    prodData->common.block,
+                    c, prodData->dataIndex,
+                    &waiterObj);
+        if (NvSciError_Success != sciErr) {
+            printf("Producer failed (%x) to query sync obj from consumer %d\n",
+                   sciErr, c);
+            return 0;
+        }
+
+        /* Save object */
+        prodData->waiterObj[c] = waiterObj;
+
+        /* If the waiter sync obj is NULL,
+         * it means this element is ready to use when received.
+         */
+        if (NULL != waiterObj) {
+            /* Create CUDA semaphore for sync object */
+            struct cudaExternalSemaphoreHandleDesc extSemDesc;
+            memset(&extSemDesc, 0, sizeof(extSemDesc));
+            extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
+            extSemDesc.handle.nvSciSyncObj = waiterObj;
+            cudaRtErr = cudaImportExternalSemaphore(&prodData->waiterSem[c],
+                                                    &extSemDesc);
+            if (cudaSuccess != cudaRtErr) {
+                printf("Producer failed (%d) to map waiter obj from cons %d\n",
+                       cudaRtErr, c);
+                return 0;
+            }
+        }
+    }
+
+    /* Indicate that element import is complete */
+    sciErr = NvSciStreamBlockSetupStatusSet(prodData->common.block,
+                                            NvSciStreamSetup_SignalObjImport,
+                                            true);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to complete signal obj import\n",
+               sciErr);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Handle generation of payloads */
+static int32_t handleProducerPayload(
+    ProdData*         prodData)
+{
+    NvSciError        sciErr;
+    int32_t           cudaErr;
+
+    /* Obtain packet for the new payload */
+    NvSciStreamCookie cookie;
+    sciErr = NvSciStreamProducerPacketGet(prodData->common.block,
+                                          &cookie);
+    if (NvSciError_Success != sciErr) {
+        if ((opts.numLateConsumer > 0U) && (sciErr == NvSciError_StreamNotConnected)) {
+            printf("[WARN] Producer failed (%x) to obtain packet for payload\n", sciErr);
+        } else {
+            printf("Producer failed (%x) to obtain packet for payload\n", sciErr);
+        }
+        return 0;
+    }
+    ProdPacket* packet = (ProdPacket*)cookie;
+
+    /*
+     * Before modifying the contents of the source buffer, make sure the
+     *   previous copy from the buffer has completed. Once done, the
+     *   fence can be cleared.
+     * Note: This CPU wait on the previously generated payload for this
+     *       packet is only necesary to protect the source buffer contents.
+     *       If this producer were processing data coming in from an external
+     *       source or generating data that didn't involve copying from a
+     *       fixed source, this wait would not be necessary. For most
+     *       producers, it is sufficient to have the engine wait for the
+     *       consumer prefences.
+     *       However, this wait does add some throttling, preventing the
+     *       producer from issuing commands for many payloads in advance,
+     *       which can be valuable in some use cases.
+     */
+    sciErr = NvSciSyncFenceWait(&packet->fence, prodData->cpuWaitContext, -1);
+    if (NvSciError_Success != sciErr) {
+        printf("Producer failed (%x) to CPU wait for previous fence\n",
+               sciErr);
+        return 0;
+    }
+    NvSciSyncFenceClear(&packet->fence);
+
+    /*
+     * Modify 1 byte of the source.
+     * We do this mainly so we're not sending the same thing every time,
+     *   but also to have an excuse to illustrate the CPU wait above,
+     *   for use cases where the producer needs to wait for more than
+     *   just an available buffer to continue.
+     */
+    packet->dataSrcMem[prodData->idxSrcMem] =
+                (uint8_t)(prodData->counter % 256);
+    prodData->counter++;
+    prodData->idxSrcMem = (prodData->idxSrcMem+1) % (prodData->dataSize);
+
+    /* Query fences for data element from each consumer */
+    for (uint32_t i = 0U; i < prodData->numConsumers; i++) {
+        /* If the received waiter obj if NULL,
+         * the consumer is done using this element,
+         * skip waiting on pre-fence.
+         */
+        if (NULL == prodData->waiterObj[i]) {
+            continue;
+        }
+
+        NvSciSyncFence prefence = NvSciSyncFenceInitializer;
+        sciErr = NvSciStreamBlockPacketFenceGet(prodData->common.block,
+                                                packet->handle,
+                                                i, prodData->dataIndex,
+                                                &prefence);
+        if (NvSciError_Success != sciErr) {
+            if ((opts.numLateConsumer > 0U) && (sciErr == NvSciError_StreamNotConnected)) {
+                printf("[WARN] Producer failed (%x) to query fence from consumer %d\n",
+                       sciErr, i);
+            } else {
+                printf("Producer failed (%x) to query fence from consumer %d\n",
+                       sciErr, i);
+            }
+            return 0;
+        }
+
+        /* Instruct CUDA to wait for each of the consumer fences */
+        struct cudaExternalSemaphoreWaitParams waitParams;
+        memset(&waitParams, 0, sizeof(waitParams));
+        waitParams.params.nvSciSync.fence = &prefence;
+        waitParams.flags = 0;
+        cudaErr = cudaWaitExternalSemaphoresAsync(
+                    &prodData->waiterSem[i],
+                    &waitParams, 1,
+                    prodData->cudaStream);
+        if (cudaSuccess != cudaErr) {
+            printf("Producer failed (%d) to wait for prefence from cons %d\n",
+                    cudaErr, i);
+            return 0;
+        }
+        NvSciSyncFenceClear(&prefence);
+    }
+
+    /* Instruct CUDA to copy the source buffer to the packet data buffer */
+    cudaErr = cudaMemcpy2DAsync(packet->dataDevMem,
+                                prodData->dataSize,
+                                packet->dataSrcMem,
+                                prodData->dataSize,
+                                prodData->dataSize,
+                                1,
+                                cudaMemcpyHostToDevice,
+                                prodData->cudaStream);
+    if (cudaSuccess != cudaErr) {
+        printf("Producer failed (%d) to issue copy command\n", cudaErr);
+        return 0;
+    }
+
+    /* Inform CUDA to signal a fence when the copy completes */
+    struct cudaExternalSemaphoreSignalParams signalParams;
+    memset(&signalParams, 0, sizeof(signalParams));
+    signalParams.params.nvSciSync.fence = &packet->fence;
+    signalParams.flags = 0;
+    cudaErr = cudaSignalExternalSemaphoresAsync(&prodData->signalSem,
+                                                &signalParams,
+                                                1,
+                                                prodData->cudaStream);
+    if (cudaSuccess != cudaErr) {
+        printf("Producer failed (%d) to signal postfence\n", cudaErr);
+        return 0;
+    }
+
+    /* Generate a checkum and save to the CRC buffer of the packet */
+    *((uint32_t*)(packet->crcPtr)) = generateCRC(packet->dataSrcMem,
+                                                 1,
+                                                 prodData->dataSize,
+                                                 prodData->dataSize);
+
+
+    /* Update postfence for data element */
+    sciErr = NvSciStreamBlockPacketFenceSet(prodData->common.block,
+                                            packet->handle,
+                                            prodData->dataIndex,
+                                            &packet->fence);
+    if (NvSciError_Success != sciErr) {
+        if ((opts.numLateConsumer > 0U) && (sciErr == NvSciError_StreamNotConnected)) {
+            printf("[WARN] Producer failed (%x) to set postfence\n", sciErr);
+        } else {
+            printf("Producer failed (%x) to set postfence\n", sciErr);
+        }
+        return 0;
+    }
+
+    /* Send the new payload to the consumer(s) */
+    sciErr = NvSciStreamProducerPacketPresent(prodData->common.block,
+                                              packet->handle);
+    if (NvSciError_Success != sciErr) {
+        if ((opts.numLateConsumer > 0U) && (sciErr == NvSciError_StreamNotConnected)) {
+            printf("[WARN] Producer failed (%x) to present packet\n", sciErr);
+        } else {
+            printf("Producer failed (%x) to present packet\n", sciErr);
+        }
+        return 0;
+    }
+
+    /* If counter has reached the limit, indicate finished */
+    if (prodData->counter == prodData->framesDesired) {
+        /* Make sure all operations have been completed
+         * before resource cleanup.
+         */
+        sciErr =  NvSciSyncFenceWait(&packet->fence,
+                            prodData->cpuWaitContext,
+                            0xFFFFFFFF);
+        if (NvSciError_Success != sciErr) {
+            printf("Producer failed (%x) to wait for all operations done\n",
+                   sciErr);
+            return 0;
+        }
+        prodData->finished = 1;
+    }
+
+    return 1;
+}
+
+/* Handle events on a producer block
+ *
+ * The producer block informs the stream of the producers buffer and
+ *   synchronization requirements and capabilities, creates signaling
+ *   synchronization objects and receives synchronization objects to
+ *   wait for, maps buffers and synchronization objects to the producer
+ *   engine(s), and generates data.
+ */
+static int32_t handleProducer(
+    void*     data,
+    uint32_t  wait)
+{
+    /* Cast to producer data */
+    ProdData* prodData = (ProdData*)data;
+
+    /* Get time to wait */
+    int64_t waitTime = wait ? prodData->common.waitTime : 0;
+
+    /* Query/wait for an event on the block */
+
+    NvSciStreamEventType event;
+    NvSciError       err;
+    err = NvSciStreamBlockEventQuery(prodData->common.block, waitTime, &event);
+
+    /* Handle errors */
+    if (NvSciError_Success != err) {
+
+        /* If not asked to wait, a timeout is not an error */
+        if (!waitTime && (NvSciError_Timeout == err)) {
+            return 0;
+        }
+
+        /* Otherwise, any error is considered fatal. A timeout probably
+         *   indicates a failure to connect and complete setup in a timely
+         *   fashion, so we specifically call out this case.
+         */
+        if (NvSciError_Timeout == err) {
+            printf("Producer timed out waiting for setup instructions\n");
+        } else {
+            printf("Producer event query failed with error %x\n", err);
+        }
+        deleteProducer(prodData);
+        return -1;
+    }
+
+    /* If we received an event, handle it based on its type
+     *
+     * Note that there's a lot of error checking we could choose to do for
+     *   some of these events, like making sure that we only receive each
+     *   event once for a given entry. But NvSciStream is expected to take
+     *   care of all of that, even when the application makes a mistake.
+     *   So we only check for things that don't trigger NvSciStream errors.
+     */
+    int32_t rv = 1;
+    NvSciError status;
+    switch (event) {
+    /*
+     * Any event we don't explicitly handle is a fatal error
+     */
+    default:
+        printf("Producer received unknown event %x\n", event);
+
+        rv = -1;
+        break;
+
+    /*
+     * Error events should never occur with safety-certified drivers,
+     *   and are provided only in non-safety builds for debugging
+     *   purposes. Even then, they should only occur when something
+     *   fundamental goes wrong, like the system running out of memory,
+     *   or stack/heap corruption, or a bug in NvSci which should be
+     *   reported to NVIDIA.
+     */
+    case NvSciStreamEventType_Error:
+        err = NvSciStreamBlockErrorGet(prodData->common.block, &status);
+        if (NvSciError_Success != err) {
+            printf("%s Failed to query the error event code %x\n",
+                   prodData->common.name, err);
+        } else {
+            printf("%s received error event: %x\n",
+                   prodData->common.name, status);
+        }
+
+        rv = -1;
+        break;
+
+    /*
+     * If told to disconnect, it means either the stream finished its
+     *   business or some other block had a failure. We'll just do a
+     *   clean up and return without an error.
+     */
+    case NvSciStreamEventType_Disconnected:
+        printf("Producer disconnected after sending %d payloads\n", prodData->counter);
+        rv = 2;
+        break;
+
+    /*
+     * On connection, the producer should initialize the appopriate engine(s)
+     *   and obtain the necessary buffer and synchronization attribute lists
+     *   for the desired use case.
+     */
+    case NvSciStreamEventType_Connected:
+
+        /* Initialize basic stream info */
+        if (!handleStreamInit(prodData)) {
+            rv = -1;
+        }
+        /* Initialize CUDA access */
+        else if (!handleProducerInit(prodData)) {
+            rv = -1;
+        }
+        /* Determine supported buffer attributes */
+        else if (!handleProducerElemSupport(prodData)) {
+            rv = -1;
+        }
+        /* Determined supported sync attributes */
+        else if (!handleProducerSyncSupport(prodData)) {
+            rv = -1;
+        }
+
+        /* Now that we're fully connected, set the wait time to infinite */
+        prodData->common.waitTime = -1;
+        break;
+
+    /* Retrieve all element information from pool */
+    case NvSciStreamEventType_Elements:
+        if (!handleProducerElemSetting(prodData)) {
+            rv = -1;
+        }
+        break;
+
+    /* For a packet, set up an entry in the array */
+    case NvSciStreamEventType_PacketCreate:
+        if (!handleProducerPacketCreate(prodData)) {
+            rv = -1;
+        }
+        break;
+
+    /* Finish any setup related to packet resources */
+    case NvSciStreamEventType_PacketsComplete:
+        /* For this use case, nothing else to setup.
+         *   Inform the NvSciStream that the producer has imported all packets.
+         */
+        err = NvSciStreamBlockSetupStatusSet(prodData->common.block,
+                                             NvSciStreamSetup_PacketImport,
+                                             true);
+        if (NvSciError_Success != err) {
+            printf("Producer failed (%x) to complete packet import\n", err);
+            rv = -1;
+        }
+        break;
+
+    /* Delete a packet - usually only relevant for non-safety applications */
+    case NvSciStreamEventType_PacketDelete:
+        handleProducerPacketDelete(prodData);
+        break;
+
+    case NvSciStreamEventType_WaiterAttr:
+        if (!handleProducerSyncExport(prodData)) {
+            rv = -1;
+        }
+        break;
+
+    /* Import consumer sync objects for all elements */
+    case NvSciStreamEventType_SignalObj:
+        if (!handleProducerSyncImport(prodData)) {
+            rv = -1;
+        }
+        break;
+
+    /* All setup complete. Transition to runtime phase */
+    case NvSciStreamEventType_SetupComplete:
+        printf("Producer setup completed\n");
+        break;
+
+    /* Generate payloads when packets are available */
+    case NvSciStreamEventType_PacketReady:
+        if (!handleProducerPayload(prodData)) {
+            rv = -1;
+        } else if (prodData->finished) {
+            printf("Producer finished sending %d payloads\n", prodData->counter);
+            rv = 2;
+        }
+        break;
+    }
+
+    /* On failure or final event, clean up the block */
+    if ((rv < 0) || (1 < rv)) {
+        if ((rv < 0) && (opts.numLateConsumer > 0U)) {
+            printf("Producer disconnected after sending %d payloads\n", prodData->counter);
+            rv = 2;
+        }
+        deleteProducer(prodData);
+    }
+
+    return rv;
+}
+
+/* Create and register a new producer block */
+int32_t createProducer_Usecase1(
+    NvSciStreamBlock* producer,
+    NvSciStreamBlock  pool,
+    uint32_t numFrames)
+{
+    /* Create a data structure to track the block's status */
+    ProdData* prodData = (ProdData*)calloc(1, sizeof(ProdData));
+    if (NULL == prodData) {
+        printf("Failed to allocate data structure for producer\n");
+        return 0;
+    }
+
+    /* Save the name for debugging purposes */
+    strcpy(prodData->common.name, "Producer");
+
+    /* Wait time for initial connection event will be 60 seconds */
+    prodData->common.waitTime = 60 * 1000000;
+    prodData->framesDesired = numFrames;
+
+    /* Create a producer block */
+    NvSciError err =
+        NvSciStreamProducerCreate(pool, &prodData->common.block);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create producer block\n", err);
+        deleteProducer(prodData);
+        return 0;
+    }
+
+    if (opts.endInfo) {
+        /* Add endpoint information on producer side.
+         *  Application can specify user-defined info to help set up stream,
+         *  which can be queried by other blocks after stream connection.
+         */
+        char info[INFO_SIZE] = {0};
+        size_t infoSize =
+            snprintf(info, INFO_SIZE, "%s%d", "Producer proc: ", getpid());
+        err = NvSciStreamBlockUserInfoSet(prodData->common.block,
+                                          ENDINFO_NAME_PROC,
+                                          infoSize, info);
+        if (NvSciError_Success != err) {
+            printf("Failed (%x) to setup the producer info\n", err);
+            deleteProducer(prodData);
+            return 0;
+        }
+    }
+
+    /* Register block with event handling mechanism */
+    if (!eventFuncs->reg(prodData->common.block, prodData, handleProducer)) {
+        deleteProducer(prodData);
+        return 0;
+    }
+
+    *producer = prodData->common.block;
+
+    return 1;
+}
diff --git a/event_sample_app/block_queue.c b/event_sample_app/block_queue.c
new file mode 100644
index 0000000..47f71b2
--- /dev/null
+++ b/event_sample_app/block_queue.c
@@ -0,0 +1,57 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - queue block
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "nvscistream.h"
+#include "block_info.h"
+#include "event_loop.h"
+
+/*
+ * Queue blocks do not require any block-specific interactions so we
+ *   use the set of common functions to handle its events.
+ */
+
+/* Create and register a new limiter block */
+int32_t createQueue(
+    NvSciStreamBlock* queue,
+    uint32_t          useMailbox)
+{
+    /* Create a data structure to track the block's status */
+    BlockData* blockData = createCommon(useMailbox ? "Mailbox" : "FIFO", 0);
+    if (NULL == blockData) {
+        return 0;
+    }
+
+    /* Create a queue block */
+    NvSciError err = useMailbox
+                   ? NvSciStreamMailboxQueueCreate(&blockData->block)
+                   : NvSciStreamFifoQueueCreate(&blockData->block);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create queue block\n", err);
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    /* Register block with event handling mechanism */
+    if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    *queue = blockData->block;
+    return 1;
+}
diff --git a/event_sample_app/block_returnsync.c b/event_sample_app/block_returnsync.c
new file mode 100644
index 0000000..3511cb1
--- /dev/null
+++ b/event_sample_app/block_returnsync.c
@@ -0,0 +1,55 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - ReturnSync block
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "nvscistream.h"
+#include "block_info.h"
+#include "event_loop.h"
+
+/*
+ * ReturnSync blocks do not require any block-specific interactions so we
+ *   use the set of common functions to handle its events.
+ */
+
+/* Create and register a new returnSync block */
+int32_t createReturnSync(
+    NvSciStreamBlock* returnSync)
+{
+    /* Create a data structure to track the block's status */
+    BlockData* blockData = createCommon("ReturnSync", 0);
+    if (NULL == blockData) {
+        return 0;
+    }
+
+    /* Create a ReturnSync block */
+    NvSciError err =
+        NvSciStreamReturnSyncCreate(sciSyncModule, &blockData->block);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create ReturnSync block\n", err);
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    /* Register block with event handling mechanism */
+    if (!eventFuncs->reg(blockData->block, blockData, handleCommon)) {
+        deleteCommon(blockData);
+        return 0;
+    }
+
+    *returnSync = blockData->block;
+    return 1;
+}
diff --git a/event_sample_app/event_loop.h b/event_sample_app/event_loop.h
new file mode 100644
index 0000000..7d5e43c
--- /dev/null
+++ b/event_sample_app/event_loop.h
@@ -0,0 +1,51 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - event handler abstraction
+ */
+
+#ifndef _EVENT_LOOP_H
+#define _EVENT_LOOP_H 1
+
+#include <stdint.h>
+#include "nvscistream.h"
+
+/*
+ * Block event handler function.
+ *   Input:
+ *     data: The block's type-specific private data structure
+ *     wait: Flag indicating whether to wait for an event
+ *   Returns:
+ *     +2: Block has completed its last operation and will be destroyed
+ *     +1: An event was found and processed
+ *      0: No event was found (not an error)
+ *     -1: Block has encountered a fatal error and will be destroyed
+ */
+typedef int32_t (*BlockFunc)(void* data, uint32_t wait);
+
+/* Table of events to abstract the two approaches for event loops */
+typedef struct {
+    int32_t (*init)(void);
+    int32_t (*reg)(NvSciStreamBlock, void*, BlockFunc);
+    int32_t (*regInt)(NvSciStreamBlock);
+    int32_t (*loop)(void);
+} EventFuncs;
+
+/* Chosen event function table */
+extern EventFuncs const* eventFuncs;
+
+/* Event tables for the two methods */
+extern EventFuncs const eventFuncs_Service;
+extern EventFuncs const eventFuncs_Threads;
+
+#endif // _EVENT_LOOP_H
diff --git a/event_sample_app/event_loop_service.c b/event_sample_app/event_loop_service.c
new file mode 100644
index 0000000..48aeee3
--- /dev/null
+++ b/event_sample_app/event_loop_service.c
@@ -0,0 +1,339 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - service-based event handling
+ *
+ * This file implements the option to handle events for all blocks
+ *   through an event service. Each block adds an event notifier to
+ *   a list. That notifier will be signaled when an event is ready
+ *   on the block. A single main loop waits for one or more of the
+ *   notifiers to trigger, processes events on the corresponding
+ *   blocks, and goes back to waiting. When all blocks have been
+ *   destroyed either due to failure or all payloads being processed,
+ *   the loop exits and the function returns.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdbool.h>
+#if (QNX == 1)
+#include <sys/neutrino.h>
+#endif
+#include "nvscievent.h"
+#include "block_info.h"
+#include "event_loop.h"
+
+/* Event service */
+static NvSciEventLoopService* service = NULL;
+
+/* List of blocks */
+#define MAX_INTERNAL_NOTIFIERS  10
+#define MAX_NOTIFIERS           MAX_BLOCKS + MAX_INTERNAL_NOTIFIERS
+
+int32_t  numBlocks                  = 0U;
+uint32_t numAlive                   = 0U;
+static uint32_t numIntNotifiers     = 0U;
+static int32_t  numNotifiers        = 0U;
+
+BlockEventData       blocks[MAX_BLOCKS];
+BlockEventData*      blocksAlive[MAX_BLOCKS];
+static NvSciEventNotifier*  intNotifiers[MAX_INTERNAL_NOTIFIERS];
+
+static uint32_t success = 1U;
+
+/* Initialize service-based event handling */
+static int32_t eventServiceInit(void)
+{
+    /*
+     * The OS configuration should be NULL for Linux and should
+     * have a valid configuration for QNX.
+     * See NvSciEventLoopServiceCreateSafe API Specification for more
+     * information.
+     */
+    void *osConfig = NULL;
+
+#if (QNX == 1)
+    struct nto_channel_config config = {0};
+
+    /*
+     * The number of pulses could be calculated based on the
+     * number of notifiers bind to the event service, number of packets and
+     * number of events handled by each block.
+     * (num_of_pulses = num_of_notifiers * 4 + \
+     *                  (num_packets + 5) * num_of_endpoints)
+     * If experienced pulse pool shortage issue in normal operation, increase
+     * the number of pulses.
+     * If there are no available pulses in the pool, SIGKILL is delivered
+     * by default. You may configure the sigevent that you want to be
+     * delivered when a pulse can't be obtained from the pool.
+     *
+     * See NvSciEventLoopServiceCreateSafe API Specification for more
+     * information.
+     */
+
+    /* The num_pulses set below is just an example number and should be
+     * adjusted depending on the use case.
+     */
+    config.num_pulses = 1024U;
+    config.rearm_threshold = 0;
+    osConfig = &config;
+#endif
+
+    /* Create event loop service */
+    NvSciError err = NvSciEventLoopServiceCreateSafe(1U, osConfig, &service);
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to create event service\n", err);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Register a new block with the event management */
+static int32_t eventServiceRegister(
+    NvSciStreamBlock blockHandle,
+    void*            blockData,
+    BlockFunc        blockFunc)
+{
+    /* Sanity check to make sure we left room for enough blocks */
+    if (numBlocks >= MAX_BLOCKS) {
+        printf("Exceeded maximum number of blocks\n");
+        return 0;
+    }
+
+    /* Grab the next entry in the list for the new block and fill it in */
+    BlockEventData* entry = &blocks[numBlocks++];
+    entry->handle = blockHandle;
+    entry->data   = blockData;
+    entry->func   = blockFunc;
+    entry->isAlive = true;
+    entry->retry = false;
+
+    /* Create a notifier for events on this block */
+    NvSciError err =
+        NvSciStreamBlockEventServiceSetup(entry->handle,
+                                          &service->EventService,
+                                          &entry->notifier);
+
+    if (NvSciError_Success != err ) {
+        printf("Failed (%x) to create event notifier for block\n", err);
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Register a new block with the event management to handle internal event.
+ *
+ * It's only supported on IpcSrc/IpcDst blocks now.
+ *
+ * Without user-provided event service, each IpcSrc/IpcDst block creates
+ *   an internal event service and spawns a dispatch thread to handle the
+ *   internal I/O messages.
+ *
+ * With the user-provided event service, no internal thread will be created.
+ *   The application needs to wait for events on these internal notifiers.
+ *   When there's a new notification on the internal notifiers, it will
+ *   trigger the NvSciStream callback function automatically.
+ *
+ * The application can bind the internal notifiers and the external
+ *   notifiers, which is used to monitor the NvSciStreamEvent on the block,
+ *   to the same event service or different ones. In this sample app, we
+ *   bind them to the same event service and use one thread to handle all
+ *   the events.
+ */
+static int32_t eventServiceInternalRegister(
+    NvSciStreamBlock blockHandle)
+{
+    /* Gets notifiers for internal events on this block */
+    numIntNotifiers = MAX_INTERNAL_NOTIFIERS;
+    NvSciError err =
+        NvSciStreamBlockInternalEventServiceSetup(
+            blockHandle,
+            &service->EventService,
+            &numIntNotifiers,
+            intNotifiers);
+
+    if (NvSciError_Success != err) {
+        printf("Failed (%x) to setup internal event service for block\n", err);
+        return 0;
+    }
+
+    /* Sanity check to make sure we left room for enough internal notifiers */
+    if (numIntNotifiers >= MAX_INTERNAL_NOTIFIERS) {
+        printf("Exceeded maximum number of internal notifiers\n");
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Main service-based event loop */
+static int32_t eventServiceLoop(void)
+{
+    int32_t i;
+    int32_t k;
+
+    /*
+     * Notes on handling notificiations:
+     *   If more than one signal occurs on a notifier in between calls
+     *     to check for events, then NvSciEvent will squash the notifications,
+     *     so only one is received. This means the application must drain
+     *     all pending events on a block after its notifier signals. It won't
+     *     receive new notifications for those pending events.
+     *   A simple implementation might process each block's events in a loop
+     *     until there are no more, and then move on to the next block. But
+     *     this poses a risk of starvation. Consider the case of a stream in
+     *     mailbox mode, where the mailbox already has a waiting payload.
+     *     If the producer receives a PacketReady event, it will obtain
+     *     the packet, fill it with data, and present it to the stream.
+     *     Because the mailbox is full, the packet will immediately be
+     *     returned, resulting in a new PacketReady event. The application
+     *     can go into an infinite loop, generating new payloads on the
+     *     producer without giving the consumer a chance to process them.
+     *   We therefore use an event loop that only processes one event
+     *     per block for each iteration, but keeps track of whether there
+     *     was an event on a block for the previous pass, and if so
+     *     retries it even if no new signal occurred. The event loop
+     *     waits for events only when there was no prior event. Otherwise
+     *     it only polls for new ones.
+     *   For internal notifiers, as handler is registered by NvSciStream
+     *     when creating the notifiers, the handler will be triggered
+     *     automatically when there's new event. Application only needs
+     *     to wait for new events but no need to handle the new events.
+     */
+
+    /* Pack all notifiers into an array */
+    NvSciEventNotifier* notifiers[MAX_NOTIFIERS];
+
+    /* Initialize loop control parameters */
+    int64_t timeout = 1000000;
+    bool event[MAX_NOTIFIERS];
+    uint32_t numAliveBlocks;
+
+    numAlive = numBlocks;
+
+    /* Main loop - Handle events until all blocks report completion or fail */
+    while (numAlive && !atomic_load(&streamDone)) {
+
+        numNotifiers = 0;
+        numAliveBlocks = 0;
+
+        /* Acquire the lock */
+        pthread_mutex_lock(&mutex);
+        /* Pack the external notifiers for the block */
+        for (i=0; i<numBlocks; ++i) {
+            if (blocks[i].isAlive) {
+                blocksAlive[numAliveBlocks] = &blocks[i];
+                notifiers[numAliveBlocks] = blocks[i].notifier;
+                numAliveBlocks++;
+            }
+        }
+
+        k = numAliveBlocks;
+        /* Pack the internal notifiers */
+        for (uint32_t j = 0; j < numIntNotifiers; ++j,++k) {
+            notifiers[k] = intNotifiers[j];
+        }
+
+        numNotifiers = numAliveBlocks + numIntNotifiers;
+
+        /* Release the lock */
+        pthread_mutex_unlock(&mutex);
+
+        /* Wait/poll for events, depending on current timeout */
+        memset(event, 0, sizeof(event));
+
+        NvSciError err = service->WaitForMultipleEventsExt(
+                                    &service->EventService,
+                                    notifiers,
+                                    numNotifiers,
+                                    timeout,
+                                    event);
+
+        if ((NvSciError_Success != err) && (NvSciError_Timeout != err)) {
+            printf("Failure (%x) while waiting/polling event service\n", err);
+            return 0;
+        }
+
+        /* Timeout for next pass will be infinite unless we need to retry */
+        timeout = 1000000;
+
+        /*
+         * Check for events on new blocks that signaled or old blocks that
+         *   had an event on the previous pass. This is done in reverse
+         *   of the order in which blocks were registered. This is because
+         *   producers are created before consumers, and for mailbox mode
+         *   we want to give the consumer a chance to use payloads before
+         *   the producer replaces them.
+         */
+        for (i=numAliveBlocks-1; ((i>=0) && (!atomic_load(&streamDone))); --i) {
+            /* Get block info */
+            BlockEventData* entry = blocksAlive[i];
+            if (entry != NULL) {
+                if (event[i] || entry->retry) {
+
+                    /* Reset to no retry for next pass */
+                    entry->retry = false;
+
+                    /* Skip if this block is no longer in use */
+                    if (entry->data) {
+
+                        /* Call the block's event handler function */
+                        int32_t rv = entry->func(entry->data, 0);
+                        if (rv < 0) {
+                            /* On failure, no longer check block and app failed */
+                            success = 0U;
+                            entry->data = NULL;
+                            numAlive--;
+                        } else if (rv == 2) {
+                            /* On completion, no longer check block */
+                            entry->isAlive = false;
+                            entry->data = NULL;
+                            numAlive--;
+                        } else if (rv == 1) {
+                            /* If event found, retry next loop */
+                            timeout = 0;
+                            entry->retry = true;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+
+    /* Delete internal notifiers */
+    for (uint32_t j=0; j<numIntNotifiers; ++j) {
+        intNotifiers[j]->Delete(intNotifiers[j]);
+    }
+
+    /* Delete notifiers */
+    for (i=0; i<numBlocks; ++i) {
+        blocks[i].notifier->Delete(blocks[i].notifier);
+    }
+
+    /* Delete service */
+    service->EventService.Delete(&service->EventService);
+
+    return success;
+}
+
+/* Table of functions for service-based event handling */
+EventFuncs const eventFuncs_Service = {
+    .init   = eventServiceInit,
+    .reg    = eventServiceRegister,
+    .regInt = eventServiceInternalRegister,
+    .loop   = eventServiceLoop
+};
diff --git a/event_sample_app/event_loop_threads.c b/event_sample_app/event_loop_threads.c
new file mode 100644
index 0000000..29eb0a2
--- /dev/null
+++ b/event_sample_app/event_loop_threads.c
@@ -0,0 +1,129 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - thread-based event handling
+ *
+ * This file implements the option to handle events for each block in
+ *   a separate thread. Each thread just has a loop that waits for a
+ *   block event to occur and handles it appropriately, until the block
+ *   has performed all required actions or receives notification that
+ *   the stream has disconnected.
+ *
+ * In practice, only a few block types (producer, consumer, and pool)
+ *   receive any events that need to be handled. So a more streamlined
+ *   application might choose to only monitor them, assuming that the
+ *   other blocks can be left alone until the time comes to tear them
+ *   down.
+ *
+ * Note: We use standard pthread functions here because it allows this
+ *       sample to run on all operating systems. QNX has its own thread
+ *       management functions which might be more efficient when using
+ *       this approach.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <pthread.h>
+#include "event_loop.h"
+
+/* Structure to track block info */
+typedef struct {
+    NvSciStreamBlock handle;
+    void*            data;
+    BlockFunc        func;
+    pthread_t        thread;
+} BlockEventData;
+
+/* List of blocks */
+#define MAX_BLOCKS 100U
+static uint32_t numBlocks = 0U;
+static BlockEventData blocks[MAX_BLOCKS];
+static uint32_t success = 1U;
+
+/* The per-thread loop function for each block */
+static void* eventThreadFunc(void* arg)
+{
+    /* Simple loop, waiting for events on the block until the block is done */
+    BlockEventData* entry = (BlockEventData*)arg;
+    while (1) {
+        int32_t rv = entry->func(entry->data, 1);
+        if (rv < 0) {
+            success = 0U;
+            break;
+        } else if (rv == 2) {
+            break;
+        }
+    }
+    return NULL;
+}
+
+/* Initialize per-thread event handling */
+static int32_t eventThreadInit(void)
+{
+    /* No special initialization required for this method */
+    return 1;
+}
+
+/* Register a new block with the event management */
+static int32_t eventThreadRegister(
+    NvSciStreamBlock blockHandle,
+    void*            blockData,
+    BlockFunc        blockFunc)
+{
+    /* Sanity check to make sure we left room for enough blocks */
+    if (numBlocks >= MAX_BLOCKS) {
+        printf("Exceeded maximum number of blocks\n");
+        return 0;
+    }
+
+    /* Grab the next entry in the list for the new block and fill it in */
+    BlockEventData* entry = &blocks[numBlocks++];
+    entry->handle = blockHandle;
+    entry->data   = blockData;
+    entry->func   = blockFunc;
+
+    /* Spawn a thread */
+    int32_t rv = pthread_create(&entry->thread,
+                                NULL,
+                                eventThreadFunc,
+                                (void*)entry);
+    if (rv != 0) {
+        printf("Failed to spawn thread to monitor block\n");
+        return 0;
+    }
+
+    return 1;
+}
+
+/* Main per-thread event loop */
+static int32_t eventThreadLoop(void)
+{
+    /*
+     * Each block has its own thread loop. This main function just needs
+     *   to wait for all of them to exit, and then return any error. This
+     *   waiting can be done in any order.
+     */
+    for (uint32_t i=0; i<numBlocks; ++i) {
+        (void)pthread_join(blocks[i].thread, NULL);
+    }
+
+    return success;
+}
+
+/* Table of functions for per-thread event handling */
+EventFuncs const eventFuncs_Threads = {
+    .init   = eventThreadInit,
+    .reg    = eventThreadRegister,
+    .regInt = NULL,
+    .loop   = eventThreadLoop
+};
diff --git a/event_sample_app/main.c b/event_sample_app/main.c
new file mode 100644
index 0000000..2dbdc7c
--- /dev/null
+++ b/event_sample_app/main.c
@@ -0,0 +1,1287 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - main application
+ *
+ * Application info:
+ *  This application creates a single stream with up to 4 consumers.
+ *  Depending on command line options, the consumers can run in the same
+ *    process as the producer, separate processes, or some combination
+ *    thereof.
+ *  Each consumer can be separately set to use a FIFO or mailbox queue,
+ *    and an optional limiter block can be requested for each one.
+ *
+ *  An application can check for NvSciStream events either by polling
+ *    or waiting for a single block at a time through a NvSciStream function,
+ *    or by binding an NvSciEventService to the blocks, allowing it to
+ *    wait for events on multiple NvSciStream blocks (as well as other
+ *    components capable of signalling NvSciEvents) simultaneously.
+ *  If an application carefully controls the order of operations, it
+ *    may be able to wait for specific events in a single thread. But
+ *    more generally, NvSciStream supports an event loop driven model,
+ *    where any event may occur and the application responds appropriately.
+ *    This can be done either with a single thread handling all blocks,
+ *    or separate threads for each block. This application provides
+ *    examples of both use cases.
+ *
+ *  For testing and demonstration purposes, the default target setup for
+ *    the NVIDIA SDK includes indexed NvSciIpc channels with base name
+ *    "nvscistream_", which are connected together in even/odd pairs.
+ *    (So nvscistream_0 is connected to nvscistream_1, nvscistream_2 is
+ *    connected to nvscistream_3, and so on.) This sample application is
+ *    hard-coded to use these channels when streaming between processes.
+ *    A production application should be modified to use the channels
+ *    defined for the production target.
+ *
+ *  This application is intended to illustrate how to do full setup of a
+ *    stream, assuming everything is working correctly. It does all
+ *    necessary error checking to confirm setup succeeded, but does not
+ *    attempt any recovery or do a full teardown in the event a failure
+ *    is detected.
+ *
+ *  Our approach to abstracting the event and per-block support is object
+ *    oriented and would lend itself well to C++. But for simplicity,
+ *    since NvSciStream itself is a C interface, we have restricted this
+ *    sample application to C code.
+ *
+ *  Unless otherwise stated, all functions in all files return 1 to
+ *    indicate success and 0 to indicate failure.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#if (QNX == 1)
+#include <sys/neutrino.h>
+#endif
+#include "nvscisync.h"
+#include "nvscibuf.h"
+#include "nvsciipc.h"
+#include "nvscistream.h"
+#include "event_loop.h"
+#include "block_info.h"
+#include <pthread.h>
+#include <stdatomic.h>
+
+/* Base name for all IPC channels */
+static const char ipcBaseName[] = "nvscistream_";
+
+/* Event handling function table */
+EventFuncs const* eventFuncs = NULL;
+
+/* Top level use-case setup function pointers */
+int32_t (*createProducer)(
+    NvSciStreamBlock* producer,
+    NvSciStreamBlock  pool,
+    uint32_t          numFrames) = createProducer_Usecase1;
+int32_t (*createConsumer)(
+    NvSciStreamBlock* consumer,
+    NvSciStreamBlock  queue,
+    uint32_t          index,
+    uint32_t          frames) = createConsumer_Usecase1;
+
+int32_t(*createPool)(
+    NvSciStreamBlock* pool,
+    uint32_t          numPacket,
+    bool              isC2cPool) = createPool_Common;
+
+
+/* NvSci modules */
+NvSciSyncModule sciSyncModule;
+NvSciBufModule  sciBufModule;
+
+/* Flag used to terminate the thread that
+* was spawned to handle the late/re-attached
+* consumer connections upon stream disconnect
+*/
+atomic_int streamDone;
+
+/* Holds the multicast block handle for
+* late/re-attach usecase
+*/
+static NvSciStreamBlock multicastBlock = 0U;
+
+/* Dispatch thread for handling late/re-attach
+*  consumer connections
+*/
+pthread_t        dispatchThread;
+
+/* Endpoint status structure*/
+Endpoint ipcEP[MAX_CONSUMERS];
+
+/* pthread variables for thread synchronization */
+pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+
+#if (QNX == 1)
+/* Data needed for QNX channel connection */
+int8_t DELTA = 1;
+int8_t ipcCode = (_PULSE_CODE_MINAVAIL + 1);
+#endif
+/* Common options for all blocks */
+CommonOptions opts;
+
+/* Options for producer */
+typedef struct {
+    uint32_t resident;
+    uint32_t numConsumer;
+    uint32_t numPacket;
+    uint32_t numFrames;
+    uint32_t usecase;
+} ProducerOptions;
+
+/* Options for each consumer */
+typedef struct {
+    uint32_t resident;
+    uint32_t useMailbox;
+    uint32_t useLimiter;
+    uint32_t c2cMode;
+    uint32_t c2cSrcUseMailbox;
+    uint32_t c2cDstNumPacket;
+    char srcChannel[32];
+    char dstChannel[32];
+    char ipcChannelForHandshake[32];
+    uint32_t numFrames;
+} ConsumerOptions;
+
+ConsumerOptions consOpts[MAX_CONSUMERS];
+
+/* Print command line options */
+static void print_usage(const char *str)
+{
+    printf("%s [options]\n", str);
+    printf(" For single- or inter-process/chip operation:\n");
+    printf("   -m <count> [default 1, max %d]\n", MAX_CONSUMERS);
+    printf("       number of multicast consumers\n");
+    printf("       (ignored if process doesn't own producer\n");
+    printf("   -f <count> [default 3]\n");
+    printf("       number of packets in main pool\n");
+    printf("       (ignored if process doesn't own producer\n");
+    printf("   -l <index> <limit> [default - not used]\n");
+    printf("       use limiter block for indexed consumer\n");
+    printf("       (ignored if process doesn't own producer\n");
+    printf("   -q <index> {f|m} [default f]\n");
+    printf("       use fifo (f) or maibox (m) for indexed consumer\n");
+    printf("       (ignored if process doesn't own the indexed consumer\n");
+    printf("   -e {s|t} [default s]\n");
+    printf("       s : events are handled through a single service\n");
+    printf("       t : events are handled with separate per-block threads\n");
+    printf("   -E\n");
+    printf("       Use the user-provided event service to handle internal I/O "
+                  "messages on ipc blocks.\n");
+    printf("       Only Supported with event service\n");
+#if (NV_SUPPORT_NVMEDIA == 1)
+    printf("   -s {y|r} [default r]\n");
+    printf("       y : NvSciColor_Y8U8Y8V8 Image Color Format in use case 2\n");
+    printf("       r : NvSciColor_A8R8G8B8 Image Color Format in use case 2\n");
+#endif
+    printf("   -u <index> [default 1]\n");
+    printf("       use case (must be same for all processes)\n");
+    printf("       1 : CUDA (rt) producer to CUDA (rt) consumer\n");
+#if (NV_SUPPORT_NVMEDIA == 1)
+    printf("       2 : NvMedia producer to CUDA (rt) consumer\n");
+#endif
+#if (NV_SUPPORT_ASILD == 1)
+    printf("       3 : CUDA (rt) producer to CUDA (rt) consumer "
+                       "in ASIL-D process, not supported in C2C.\n");
+#endif
+    printf("   -i [default - not used]\n");
+    printf("       set endpoint info and query info from other endpoints\n");
+    printf(" For inter-process operation:\n");
+    printf("   -p\n");
+    printf("       producer resides in this process\n");
+    printf("   -c <index> \n");
+    printf("       indexed consumer resides in this process\n");
+    printf(" For inter-chip (C2C) operation:\n");
+    printf("   -P <index> <Ipc endpoint name>\n");
+    printf("       producer resides in this process\n");
+    printf("       Ipc endpoint used by the producer to communicate with the "
+                  "indexed chip-to-chip (C2C) consumer\n");
+    printf("       User must provide all the C2C endpoints required to "
+                  "communicate with the total number of consumers for C2C "
+                  "usecase when late-/reattach is chosen.\n");
+    printf("   -C <index> <Ipc endpoint name>\n");
+    printf("       indexed consumer resides in this process\n");
+    printf("       Ipc endpoint used by this chip-to-chip (C2C) consumer\n");
+    printf("       -C and -c can't be used simultaneously.\n");
+    printf("       (ignored if process owns producer)\n");
+    printf("   -F <index> <count> [default 3]\n");
+    printf("       number of packets in pool attached to the IpcDst block "
+                  "of the indexed C2C consumer\n");
+    printf("       set along with the indexed C2C consumer.\n");
+    printf("       (ignored if process doesn't own indexed C2C consumer)\n");
+    printf("   -Q <index> {f|m} [default f]\n");
+    printf("       use fifo (f) or maibox (m) for C2C IpcSrc of indexed "
+                   "consumer.\n");
+    printf("       Can't specify same index as -c)\n");
+    printf("       set in the producer process.\n");
+    printf("       (ignored if process doesn't own producer)\n");
+    printf("   -r <index> [default 0]\n");
+    printf("       Number of late-attach consumers\n");
+    printf("       set in the producer process and currently supported "
+                   "for usecase1.\n");
+    printf("   -L\n");
+    printf("       set in the consumer process to indicate the consumer "
+                   "connection is late/re-attach.\n");
+    printf("   -k <index> <frames> [default 0]\n");
+    printf("       Number of frames expected to be received by the indexed consumer\n");
+    printf("   -n <frames> [default 32]\n");
+    printf("       Number of frames expected to be produced by the producer\n");
+    printf("       With -r option is specified, the default value is set to 100000\n");
+    printf("       With -r option is NOT specified, the default value is set to 32\n");
+}
+
+/* Deletes the block that are created for handling late-/re-attach
+* connection when late/re-attach consumer connection fails.
+*/
+static void deleteBlock(NvSciStreamBlock block)
+{
+    for (int32_t i=0; i< numBlocks; i++) {
+        BlockEventData* entry = &blocks[i];
+        if (entry->handle == block) {
+            deleteCommon(entry->data);
+        }
+    }
+}
+
+/* Function to handle the opening of IPC endpoint
+* for handshaking.
+*/
+static bool openIpcEndpoint(uint32_t index, bool isConsumer)
+{
+    NvSciError err;
+    if (!isConsumer) {
+        err = NvSciIpcOpenEndpoint(ipcEP[index].ipcChannelForHandshake,
+                                   &ipcEP[index].ipcEndpoint);
+        if (NvSciError_Success != err) {
+            printf("Failed (%x) to open channel (%s) for IpcSrc\n",
+                err, ipcEP[index].ipcChannelForHandshake);
+            return false;
+        }
+    } else {
+        err = NvSciIpcOpenEndpoint(consOpts[index].ipcChannelForHandshake,
+                                   &ipcEP[index].ipcEndpoint);
+        if (NvSciError_Success != err) {
+            printf("Failed (%x) to open channel (%s) for IpcDst\n",
+                   err, consOpts[index].ipcChannelForHandshake);
+            return false;
+        }
+    }
+
+#if (QNX == 1)
+    ipcEP[index].chid = ChannelCreate_r(_NTO_CHF_UNBLOCK | _NTO_CHF_PRIVATE);
+    if (ipcEP[index].chid < 0) {
+        printf("ChannelCreate_r: fail for connection index=%d \n", index);
+        return false;
+    }
+
+    ipcEP[index].coid = ConnectAttach_r(0, 0, ipcEP[index].chid,
+                                       _NTO_SIDE_CHANNEL, _NTO_COF_CLOEXEC);
+    if (ipcEP[index].coid < 0) {
+        printf("ConnectAttach_r: fail for connection index=%d\n",  index);
+        return false;
+    }
+    err = NvSciIpcSetQnxPulseParamSafe(ipcEP[index].ipcEndpoint, ipcEP[index].coid,
+                                        SIGEV_PULSE_PRIO_INHERIT,
+                                        ipcCode);
+    if (err != NvSciError_Success) {
+        printf("NvSciIpcSetQnxPulseParamSafe(%x) failed for connection index=%d\n",
+                err, index);
+        return false;
+    }
+#endif
+
+    err = NvSciIpcResetEndpointSafe(ipcEP[index].ipcEndpoint);
+    if (err != NvSciError_Success) {
+        printf("NvSciIpcResetEndpointSafe(%x) failed for connection index=%d\n",
+               err, index);
+        return false;
+    }
+
+    return true;
+}
+
+/* Function to handle the IPC connection for handshaking */
+static bool waitForIpcConnection(uint32_t index, bool isConsumer)
+{
+    NvSciError err;
+    bool retry = true;
+
+    while(retry) {
+        uint32_t receivedEvents = 0U;
+        err = NvSciIpcGetEventSafe(ipcEP[index].ipcEndpoint, &receivedEvents);
+        if (NvSciError_Success != err) {
+            atomic_store(&streamDone, 1);
+            printf("Failed (%x) to retrieve IPC events for connection index=%d\n",
+                    err, index);
+            return false;
+        }
+        /* No need to retry if it is a producer */
+        if(!isConsumer) {
+            retry = false;
+        }
+
+        if (receivedEvents & (NV_SCI_IPC_EVENT_CONN_EST_ALL)) {
+#if (QNX == 1)
+            if (ipcEP[index].coid != 0) {
+                (void)ConnectDetach_r(ipcEP[index].coid);
+                ipcEP[index].coid = 0;
+            }
+            if (ipcEP[index].chid != 0) {
+                (void)ChannelDestroy_r(ipcEP[index].chid);
+                ipcEP[index].chid = 0;
+            }
+#endif
+            err = NvSciIpcCloseEndpointSafe(ipcEP[index].ipcEndpoint, false);
+            if (NvSciError_Success != err) {
+                atomic_store(&streamDone, 1);
+                printf("Failed (%x) to close IPC endpoint for connection index=%d\n",
+                        err, index);
+                return false;
+            }
+            ipcEP[index].ipcEndpoint = 0U;
+
+            /* We need to open the endpoint again if it is not a consumer */
+            if (!isConsumer) {
+                err = NvSciIpcOpenEndpoint(ipcEP[index].ipcChannel,
+                                           &ipcEP[index].ipcEndpoint);
+                if (NvSciError_Success != err) {
+                    atomic_store(&streamDone, 1);
+                    printf("Failed (%x) to open channel (%s) for IpcSrc \
+                           for connection index=%d\n",
+                           err, ipcEP[index].ipcChannel, index);
+                    return false;
+                }
+                err = NvSciIpcResetEndpointSafe(ipcEP[index].ipcEndpoint);
+                if (NvSciError_Success != err) {
+                    atomic_store(&streamDone, 1);
+                    printf("Failed (%x) to reset IPC endpoint for connection \
+                            index = %d\n", err, index);
+                    return false;
+                }
+                ipcEP[index].ipcConnected = true;
+            }
+            return true;
+        }
+    }
+    return false;
+}
+
+/* Dispatch thread function to handle late/re-attach
+* consumer connections
+*/
+void* handleLateConsumerThreadFunc(void *args)
+{
+    NvSciError err;
+    bool lateConsumerConnectionFound = false;
+    bool retry = false;
+
+    while(!atomic_load(&streamDone) || retry) {
+        /* Poll for the status of IPC channels */
+        pthread_mutex_lock(&mutex);
+        for (uint32_t i=0; i<MAX_CONSUMERS; i++) {
+            if (!ipcEP[i].ipcOpened) {
+                /* Open the endpoint, which may be using by consumer for late
+                * connection at this point, consumer may/may not be waiting for
+                * connection
+                */
+                ipcEP[i].ipcOpened = true;
+                if (true != openIpcEndpoint(i, false)) {
+                    atomic_store(&streamDone, 1);
+                    return NULL;
+                }
+            }
+            if (!ipcEP[i].c2cOpened && opts.c2cMode && consOpts[i].c2cMode &&
+                    (i < opts.numConsumer)) {
+                /* Open the endpoint, which may be using by consumer for late
+                * connection at this point, consumer may/may not be waiting for
+                * connection
+                */
+                /* Open the named channel */
+                ipcEP[i].c2cOpened = true;
+                err = NvSciIpcOpenEndpoint(ipcEP[i].c2cChannel,
+                                           &ipcEP[i].c2cEndpoint);
+                if (NvSciError_Success != err) {
+                    printf("Failed (%x) to open channel (%s) for c2cSrc\n",
+                           err, ipcEP[i].c2cChannel);
+                    ipcEP[i].c2cOpened = false;
+                }
+
+                err = NvSciIpcResetEndpointSafe(ipcEP[i].c2cEndpoint);
+                if (NvSciError_Success != err) {
+                    printf("Failed (%x) to reset C2C endpoint", err);
+                    ipcEP[i].c2cOpened = false;
+                }
+            }
+
+            if (!ipcEP[i].ipcConnected) {
+                if (true == waitForIpcConnection(i, false)) {
+                    /* Create IpcSrc block */
+                    if (!createIpcSrc2(&ipcEP[i].ipcBlock,
+                                      ipcEP[i].ipcEndpoint,
+                                      opts.useExtEventService)) {
+                        printf("Failed to create IpcSrc to handle \
+                                late consumer connection\n");
+                        atomic_store(&streamDone, 1);
+                        return NULL;
+                    }
+                    ++numAlive;
+                     /* When Late-/reattach usecase is selected, a returnSync block
+                      * is connected to the consumer chain to ensure proper fence waits
+                      * during consumer disconnect and reconnect.
+                      */
+                    if (!createReturnSync(&ipcEP[i].returnSync)) {
+                        printf("Failed to create Returnsync to handle late \
+                            consumer connection\n");
+                        deleteBlock(ipcEP[i].ipcBlock);
+                        atomic_store(&streamDone, 1);
+                        return NULL;
+                    }
+                    ++numAlive;
+                    /* Connect ReturnSync and IpcSrc */
+                    if (NvSciError_Success !=
+                        NvSciStreamBlockConnect(ipcEP[i].returnSync,
+                                                ipcEP[i].ipcBlock)) {
+                        printf("Failed to connect ipcsrc to returnsync\n");
+                        deleteBlock(ipcEP[i].ipcBlock);
+                        deleteBlock(ipcEP[i].returnSync);
+                        atomic_store(&streamDone, 1);
+                        return NULL;
+                    }
+                    /* Connect to multicast */
+                    if (NvSciError_Success !=
+                        NvSciStreamBlockConnect(multicastBlock,
+                                                ipcEP[i].returnSync)) {
+                        printf("Failed to connect ipcsrc to multicast\n");
+                        deleteBlock(ipcEP[i].ipcBlock);
+                        deleteBlock(ipcEP[i].returnSync);
+                        atomic_store(&streamDone, 1);
+                        return NULL;
+                    } else {
+                        lateConsumerConnectionFound = true;
+                        break;
+                    }
+                }
+            }
+            if (!ipcEP[i].c2cConnected && opts.c2cMode && consOpts[i].c2cMode &&
+                (i < (opts.numConsumer)))  {
+                // Check IPC for connection
+                uint32_t receivedEvents = 0U;
+                err = NvSciIpcGetEventSafe(ipcEP[i].c2cEndpoint, &receivedEvents);
+                if (NvSciError_Success != err) {
+                    printf("Failed (%x) to retrieve IPC events", err);
+                    atomic_store(&streamDone, 1);
+                    return NULL;
+                }
+                if (receivedEvents & (NV_SCI_IPC_EVENT_CONN_EST_ALL)) {
+                    ipcEP[i].c2cConnected = true;
+
+                    if (!createQueue(&ipcEP[i].queue, false)) {
+                        printf("Failed to create Queue to handle \
+                            late consumer connection\n");
+                        atomic_store(&streamDone, 1);
+                        return NULL;
+                    }
+                    ++numAlive;
+                    /* Create C2C block */
+                    if (!createC2cSrc2(&ipcEP[i].ipcBlock,
+                                       ipcEP[i].c2cEndpoint,
+                                       ipcEP[i].queue)) {
+                        printf("Failed to create C2cSrc to handle late \
+                            consumer connection\n");
+                        deleteBlock(ipcEP[i].queue);
+                        atomic_store(&streamDone, 1);
+                        return NULL;
+                    }
+                    ++numAlive;
+
+                    /* When Late-/reattach usecase is selected, a returnSync
+                    * block is connected to the consumer chain to ensure proper
+                    * fence waits during consumer disconnect and reconnect.
+                    */
+                    if (!createReturnSync(&ipcEP[i].returnSync)) {
+                        printf("Failed to create Returnsync to handle late \
+                            consumer connection\n");
+                        deleteBlock(ipcEP[i].ipcBlock);
+                        deleteBlock(ipcEP[i].queue);
+                        atomic_store(&streamDone, 1);
+                        return NULL;
+                    }
+                    ++numAlive;
+                    /* Connect ReturnSync and IpcSrc */
+                    if (NvSciError_Success !=
+                        NvSciStreamBlockConnect(ipcEP[i].returnSync,
+                                                ipcEP[i].ipcBlock)) {
+                        printf("Failed to connect c2csrc to returnsync\n");
+                        deleteBlock(ipcEP[i].ipcBlock);
+                        deleteBlock(ipcEP[i].returnSync);
+                        deleteBlock(ipcEP[i].queue);
+                        atomic_store(&streamDone, 1);
+                        return NULL;
+                    }
+
+                    /* Connect to multicast */
+                    if (NvSciError_Success !=
+                        NvSciStreamBlockConnect(multicastBlock,
+                                                ipcEP[i].returnSync)) {
+                        printf("Failed to connect returnsync to multicast\n");
+                        deleteBlock(ipcEP[i].ipcBlock);
+                        deleteBlock(ipcEP[i].returnSync);
+                        deleteBlock(ipcEP[i].queue);
+                        atomic_store(&streamDone, 1);
+                        return NULL;
+                    } else {
+                        lateConsumerConnectionFound = true;
+                        break;
+                    }
+                }
+            }
+        }
+
+        if (lateConsumerConnectionFound) {
+            /* initiate multicast block to handle the late consumer connection */
+            err = NvSciStreamBlockSetupStatusSet(multicastBlock,
+                                                NvSciStreamSetup_Connect,
+                                                true);
+            /* Ignore NvSciError_AlreadyDone and NvSciError_NotYetAvailable
+            * error codes as the multicast block might have already started
+            * the setup with the connected consumer when it is a last consumer.
+            * In case of other consumer connections, the error will be thrown
+            * while connecting the new consumer to multicast block during
+            * NvSciStreamBlockConnect() call if multicast is not ready yet to
+            * accept late/re-attach consumer connections.
+            */
+            if (err != NvSciError_Success &&
+                err != NvSciError_AlreadyDone &&
+                err != NvSciError_NotYetAvailable) {
+                printf("Attaching a late consumer connection failed = %x\n", err);
+                atomic_store(&streamDone, 1);
+                return NULL;
+            } else {
+                lateConsumerConnectionFound = false;
+                /* Go for sleep mode until a signal comes from multicast block
+                * after processing the current set of late consumers
+                */
+                pthread_cond_wait(&cond, &mutex);
+            }
+        }
+        pthread_mutex_unlock(&mutex);
+        usleep(100000);
+    }
+    return NULL;
+}
+
+
+/* Set up chain of producer side blocks, up to optional multicast */
+static int32_t setupProducerChain(
+    NvSciStreamBlock* producerLink,
+    ProducerOptions*  prodOpts)
+{
+    /* Create pool */
+    NvSciStreamBlock poolBlock;
+    if (!createPool(&poolBlock, prodOpts->numPacket, false)) {
+        return 0;
+    }
+
+    /* Create producer */
+    NvSciStreamBlock producerBlock;
+    if (!createProducer(&producerBlock, poolBlock, prodOpts->numFrames)) {
+        return 0;
+    }
+
+    /* If multicast required, add the block. */
+    if (prodOpts->numConsumer > 1) {
+
+        /* Create multicast block */
+        if (!createMulticast(&multicastBlock, prodOpts->numConsumer)) {
+            return 0;
+        }
+
+        /* Connect to producer */
+        if (NvSciError_Success !=
+            NvSciStreamBlockConnect(producerBlock, multicastBlock)) {
+            printf("Failed to connect multicast to producer\n");
+            return 0;
+        }
+
+        /* Multicast block is end of chain */
+        *producerLink = multicastBlock;
+
+    } else {
+
+        /* Producer block is end of chain */
+        *producerLink = producerBlock;
+
+    }
+
+    return 1;
+}
+
+/* Set up chain of consumer side blocks */
+static int32_t setupConsumerChain(
+    NvSciStreamBlock* consumerLink,
+    ConsumerOptions*  consOpts,
+    uint32_t          index)
+{
+    /*
+     * Note: Currently the consumer "chain" just consists of the consumer
+     *       itself and its associated queue. We follow this paradigm to
+     *       allow easy addition of new optional blocks in the future.
+     */
+
+    /* Create queue */
+    NvSciStreamBlock queueBlock;
+    if (!createQueue(&queueBlock, consOpts->useMailbox)) {
+        return 0;
+    }
+
+    /* Create consumer */
+    NvSciStreamBlock consumerBlock;
+    if (!createConsumer(&consumerBlock, queueBlock, index, consOpts->numFrames)) {
+        return 0;
+    }
+
+    /* Consumer block is start of chain */
+    *consumerLink = consumerBlock;
+
+    return 1;
+}
+
+/* Add additional branch options */
+static int32_t setupBranchOptions(
+    NvSciStreamBlock* consumerLink,
+    ConsumerOptions*  consOpts)
+{
+    /* If limiter requested, add it */
+    if (consOpts->useLimiter) {
+
+       /* If a consumer may generate unreliable fences, a ReturnSync block can
+        * be added as the downstream of the Limiter block for that consumer,
+        * to isolate any packets with bad fences.
+        */
+        NvSciStreamBlock returnSyncBlock;
+        if (!createReturnSync(&returnSyncBlock)) {
+            return 0;
+        }
+
+        /* Connect to incoming consumer chain */
+        if (NvSciError_Success !=
+            NvSciStreamBlockConnect(returnSyncBlock, *consumerLink)) {
+            printf("Failed to connect returnSyncBlock to consumer chain\n");
+            return 0;
+        }
+
+
+        /* ReturnSync is new end of chain */
+        *consumerLink = returnSyncBlock;
+
+        /* Create limiter */
+        NvSciStreamBlock limiterBlock;
+        if (!createLimiter(&limiterBlock, consOpts->useLimiter)) {
+            return 0;
+        }
+
+        /* Connect to incoming consumer chain */
+        if (NvSciError_Success !=
+            NvSciStreamBlockConnect(limiterBlock, *consumerLink)) {
+            printf("Failed to connect limiter to consumer chain\n");
+            return 0;
+        }
+
+        /* Limiter is new end of chain */
+        *consumerLink = limiterBlock;
+    }
+
+    return 1;
+}
+
+/* Set up IPC from producer to consumer */
+static int32_t setupProdToConsIPC(
+    NvSciStreamBlock* consumerLink,
+    ConsumerOptions*  consOpts,
+    bool              useExtEventService)
+{
+    if (!consOpts->c2cMode) {
+        /* Create IPC block */
+        if (!createIpcSrc(consumerLink,
+                          consOpts->srcChannel,
+                          useExtEventService)) {
+            return 0;
+        }
+    } else {
+        /* Create a queue for C2C src block */
+        NvSciStreamBlock queueBlock;
+        if (!createQueue(&queueBlock, consOpts->c2cSrcUseMailbox)) {
+            return 0;
+        }
+
+        /* Create C2C block */
+        if (!createC2cSrc(consumerLink, consOpts->srcChannel, queueBlock)) {
+            return 0;
+        }
+
+        /* If mailbox is used with C2CSrc, then create presentSync block */
+        if (1U == consOpts->c2cSrcUseMailbox) {
+            NvSciStreamBlock presentSyncBlock;
+            if (!createPresentSync(&presentSyncBlock)) {
+                return 0;
+            }
+
+            if (NvSciError_Success !=
+                NvSciStreamBlockConnect(presentSyncBlock, *consumerLink)) {
+                printf("Failed to connect PresentSync to consumer chain\n");
+                return 0;
+            }
+
+            /* PresentSync is new end of chain */
+            *consumerLink = presentSyncBlock;
+        }
+    }
+    return 1;
+}
+
+/* Set up IPC from consumer to producer */
+static int32_t setupConsToProdIPC(
+    NvSciStreamBlock* producerLink,
+    ConsumerOptions*  consOpts,
+    bool              useExtEventService)
+{
+    if (!consOpts->c2cMode) {
+        /* Create IPC block */
+        if (!createIpcDst(producerLink,
+                          consOpts->dstChannel,
+                          useExtEventService)) {
+            return 0;
+        }
+    } else {
+        /* Create a pool for C2C dst block */
+        NvSciStreamBlock poolBlock;
+        if (!createPool(&poolBlock, consOpts->c2cDstNumPacket, true)) {
+            return 0;
+        }
+        /* Create C2C block */
+        if (!createC2cDst(producerLink, consOpts->dstChannel, poolBlock)) {
+            return 0;
+        }
+    }
+    return 1;
+}
+
+/*
+ * Main application function.
+ *   As per standards, return of 0 indicates success and anything
+ *   else is failure.
+ */
+int main(int argc, char *argv[])
+{
+    uint32_t i;
+    int ret = 0;
+
+    /* Initialize parameters */
+    uint32_t badParam = 0U;
+    uint32_t multiProcess = 0U;
+    uint32_t multiSOC = 0U;
+    uint32_t eventOption = 0U;
+
+    ProducerOptions prodOpts = {.resident=0U, .numConsumer=1U,
+                                .numPacket=3U, .numFrames=32, .usecase=1};
+    memset(consOpts, 0, sizeof(consOpts));
+    memset(&opts, 0, sizeof(CommonOptions));
+    memset(ipcEP, 0, sizeof(Endpoint));
+
+    /* Parse command line */
+    int32_t opt;
+    while ((opt = getopt(argc, argv, "m:n:r:f:l:q:k:e:ELs:u:ipc:P:C:F:Q:")) != EOF) {
+        switch (opt) {
+        case 'm': /* set number of consumers */
+            prodOpts.numConsumer = atoi(optarg);
+            opts.numConsumer = prodOpts.numConsumer;
+            if ((prodOpts.numConsumer < 1U) ||
+                (prodOpts.numConsumer > MAX_CONSUMERS)) {
+                badParam = 1U;
+            }
+            for (i=0; i< MAX_CONSUMERS; i++) {
+                sprintf(ipcEP[i].ipcChannel, "%s%d", ipcBaseName, 2*i+0);
+                sprintf(ipcEP[i].ipcChannelForHandshake,
+                        "%s%d", ipcBaseName, 2*i+8);
+            }
+            break;
+        case 'r': /* set number of late consumers */
+            opts.numLateConsumer = atoi(optarg);
+            opts.lateAttach = true;
+            if (opts.numLateConsumer > MAX_CONSUMERS) {
+                badParam = 1U;
+            }
+            /* there must be atleast one early consumer */
+            if ((prodOpts.numConsumer - opts.numLateConsumer) < 1U) {
+                badParam = 1U;
+            }
+            prodOpts.numFrames = 100000;
+            break;
+        case 'f': /* set number of packets */
+            prodOpts.numPacket = atoi(optarg);
+            if ((prodOpts.numPacket < 1U) ||
+                (prodOpts.numPacket > MAX_PACKETS)) {
+                badParam = 1U;
+            }
+            break;
+
+        case 'k': /* use specified number of frames for indexed consumer */
+            i = atoi(optarg);
+            if (i >= MAX_CONSUMERS) {
+                badParam = 1U;
+            } else {
+                consOpts[i].numFrames = atoi(argv[optind++]);
+            }
+            break;
+
+        case 'n': /* use specified number of frames for producer */
+            prodOpts.numFrames = atoi(optarg);
+            break;
+
+        case 'l': /* use limiter block for indexed consumer */
+            i = atoi(optarg);
+            if (i >= MAX_CONSUMERS) {
+                badParam = 1U;
+            } else {
+                consOpts[i].useLimiter = atoi(argv[optind++]);
+            }
+            break;
+        case 'q': /* use specified queue for indexed consumer */
+            i = atoi(optarg);
+            if (i >= MAX_CONSUMERS) {
+                badParam = 1U;
+            } else {
+                char t = argv[optind++][0];
+                if (t == 'm') {
+                    consOpts[i].useMailbox = 1U;
+                } else if (t == 'f') {
+                    consOpts[i].useMailbox = 0U;
+                } else {
+                    badParam = 1U;
+                }
+            }
+            break;
+        case 'e': /* set event handling mechanism */
+            if (optarg[0] == 's') {
+                eventOption = 0U;
+            } else if (optarg[0] == 't') {
+                eventOption = 1U;
+            } else {
+                badParam = 1U;
+            }
+            break;
+        case 'E': /* set user-provided event service to handle IPC event */
+            opts.useExtEventService = true;
+            break;
+
+        case 'L': /* Indicates late/reattaching of a consumer connection */
+            opts.lateAttach = true;
+            break;
+
+        case 's': /* set Image Color Format type */
+            if (optarg[0] == 'r') {
+                opts.yuv = false;
+            } else if (optarg[0] == 'y') {
+                opts.yuv = true;
+            } else {
+                badParam = 1U;
+            }
+            break;
+        case 'u': /* set use case */
+            i = atoi(optarg);
+            prodOpts.usecase = i;
+            if (i == 1) {
+                createProducer = createProducer_Usecase1;
+                createConsumer = createConsumer_Usecase1;
+                createPool     = createPool_Common;
+            }
+#if (NV_SUPPORT_NVMEDIA == 1)
+            else if (i == 2) {
+                createProducer = createProducer_Usecase2;
+                createConsumer = createConsumer_Usecase2;
+                createPool     = createPool_Common;
+
+            }
+#endif
+#if (NV_SUPPORT_ASILD == 1)
+            else if (i == 3) {
+                createProducer = createProducer_Usecase3;
+                createConsumer = createConsumer_Usecase3;
+                createPool     = createPool_Usecase3;
+            }
+#endif
+            else {
+                badParam = 1U;
+            }
+            break;
+        case 'i':
+            opts.endInfo = true;
+            break;
+
+        /* For inter - process operation */
+
+        case 'p': /* set producer resident */
+            prodOpts.resident = 1U;
+            multiProcess = 1U;
+            break;
+        case 'c': /* set consumer resident */
+            i = atoi(optarg);
+            if (i >= MAX_CONSUMERS) {
+                badParam = 1U;
+            } else {
+                consOpts[i].resident = 1U;
+                multiProcess = 1U;
+            }
+            break;
+
+        /* For inter - chip (C2C) operation */
+
+        case 'P': /* set ipc endpoint for C2C */
+            i = atoi(optarg);
+            if (i >= MAX_CONSUMERS) {
+                badParam = 1U;
+            } else {
+                /* Ipc channel used to communicate with this C2C consumer */
+                strcpy(consOpts[i].srcChannel, argv[optind++]);
+                strcpy(ipcEP[i].c2cChannel, consOpts[i].srcChannel);
+                consOpts[i].c2cMode = 1U;
+                prodOpts.resident = 1U;
+                multiProcess = 1U;
+                multiSOC = 1U;
+                opts.c2cMode = true;
+            }
+            break;
+        case 'C': /* set C2C mode */
+            i = atoi(optarg);
+            if (i >= MAX_CONSUMERS) {
+                badParam = 1U;
+            } else {
+                /* Ipc channel name used by this C2C consumer */
+                strcpy(consOpts[i].dstChannel, argv[optind++]);
+                consOpts[i].c2cMode = 1U;
+                multiProcess = 1U;
+                multiSOC = 1U;
+                if (consOpts[i].c2cDstNumPacket == 0U) {
+                    /* default packet size 3 if not set already */
+                    consOpts[i].c2cDstNumPacket = 3U;
+                }
+            }
+            break;
+        case 'F': /* set number of packets for C2C Dst of indexed consumer */
+            i = atoi(optarg);
+            if (i >= MAX_CONSUMERS) {
+                badParam = 1U;
+            } else {
+                consOpts[i].c2cDstNumPacket = atoi(argv[optind++]);
+                if ((consOpts[i].c2cDstNumPacket < 1U) ||
+                    (consOpts[i].c2cDstNumPacket > MAX_PACKETS)) {
+                    badParam = 1U;
+                }
+            }
+            break;
+        case 'Q': /* use specified queue for C2C Src of indexed consumer */
+            i = atoi(optarg);
+            if (i >= MAX_CONSUMERS) {
+                badParam = 1U;
+            } else {
+                char t = argv[optind++][0];
+                if (t == 'm') {
+                    consOpts[i].c2cSrcUseMailbox = 1U;
+                } else if (t == 'f') {
+                    consOpts[i].c2cSrcUseMailbox = 0U;
+                } else {
+                    badParam = 1U;
+                }
+            }
+            break;
+        default:
+            badParam = 1U;
+            break;
+        }
+    }
+
+    /* Handle parsing failure */
+    if (badParam) {
+        print_usage(argv[0]);
+        return 1;
+    }
+
+    /* Initialize streamDone */
+    atomic_init(&streamDone, 0);
+
+    if (opts.useExtEventService && (opts.numLateConsumer > 0U)) {
+        /* Using external event service for late/re-attach usecase
+        * is not supported for now.
+        */
+        return 1;
+    }
+
+    if (opts.useExtEventService && (eventOption == 1U)) {
+        /* Using external event service for internal ipc I/O messages
+         *  not supported with threading model in this sample app */
+        return 1;
+    }
+
+
+    if ((prodOpts.usecase > 1U) && (opts.numLateConsumer > 0U)) {
+        /* late/re-attach usecase is not supported except for usecase1
+           in this sample app. */
+        return 1;
+    }
+
+    /* Check validity of the combination C2C & non-C2C consumers */
+    for (i=0U; i<MAX_CONSUMERS; ++i) {
+        if (prodOpts.resident) {
+            /* C2C consumer cannot be in the same process as producer */
+            if (consOpts[i].resident && consOpts[i].c2cMode) {
+                return 1;
+            }
+        } else {
+            /* There is C2C consumer in this process,
+            * can't have non-C2C ones
+            */
+            if (multiSOC && consOpts[i].resident) {
+                return 1;
+            }
+
+            /* Now make consumer resident if C2C */
+            if (consOpts[i].c2cMode) {
+                consOpts[i].resident = 1U;
+            }
+        }
+    }
+
+    /* Fill in other options based on those specified */
+    if (!multiProcess) {
+        /* If no multi-process option specified, everything is resident */
+        prodOpts.resident = 1U;
+        for (i=0U; i<prodOpts.numConsumer; ++i) {
+            consOpts[i].resident = 1U;
+        }
+    } else {
+        /* If not in producer process, will just loop over full list */
+        if (!prodOpts.resident) {
+            prodOpts.numConsumer = MAX_CONSUMERS;
+        }
+        /* Channel names are derived from base and index */
+        for (i=0U; i<prodOpts.numConsumer; ++i) {
+            if (!consOpts[i].c2cMode) {
+                sprintf(consOpts[i].srcChannel, "%s%d", ipcBaseName, 2*i+0);
+                sprintf(consOpts[i].ipcChannelForHandshake,
+                        "%s%d", ipcBaseName, 2*i+9);
+                sprintf(consOpts[i].dstChannel, "%s%d", ipcBaseName, 2*i+1);
+            }
+        }
+    }
+
+    /* Select and initialize event-handling based on chosen method */
+    eventFuncs = eventOption ? &eventFuncs_Threads : &eventFuncs_Service;
+    if (!eventFuncs->init() != 0) {
+        return 1;
+    }
+
+    /*
+     * Initialize NvSci libraries
+     */
+    if (NvSciError_Success != NvSciSyncModuleOpen(&sciSyncModule)) {
+        printf("Unable to open NvSciSync module\n");
+    }
+    if (NvSciError_Success != NvSciBufModuleOpen(&sciBufModule)) {
+        printf("Unable to open NvSciBuf module\n");
+    }
+    if (NvSciError_Success != NvSciIpcInit()) {
+        printf("Unable to initialize NvSciIpc\n");
+    }
+
+    /*
+     * If producer is resident, create producer block chain and attach
+     *  all consumers.
+     */
+    if (prodOpts.resident) {
+
+        /* Set up producer chain (up through any multicast block) */
+        NvSciStreamBlock producerLink;
+        if (!setupProducerChain(&producerLink, &prodOpts)) {
+            return 1;
+        }
+
+        /*
+         * For each consumer, either set up the consumer chain or create
+         *  the IPC block to communicate with it, depending on whether the
+         *  consumer is resident.
+         */
+        for (i=0U; i<(prodOpts.numConsumer - opts.numLateConsumer); ++i) {
+
+            /* Create consumer or IPC to consumer */
+            NvSciStreamBlock consumerLink;
+            if (consOpts[i].resident) {
+                if (!setupConsumerChain(&consumerLink, &consOpts[i], i)) {
+                    return 1;
+                }
+            } else {
+                if (!setupProdToConsIPC(&consumerLink,
+                                        &consOpts[i],
+                                        opts.useExtEventService)) {
+                    return 1;
+                }
+            }
+             /* When Late-/reattach usecase is selected, a returnSync block
+             * is connected to the consumer chain to ensure proper fence waits
+             * during consumer disconnect and reconnect.
+             */
+             if (opts.numLateConsumer > 0U) {
+                NvSciStreamBlock returnSyncBlock;
+                if (!createReturnSync(&returnSyncBlock)) {
+                    return 1;
+                }
+
+                /* Connect to incoming consumer chain */
+               if (NvSciError_Success !=
+                       NvSciStreamBlockConnect(returnSyncBlock, consumerLink)) {
+                   printf("Failed to connect returnSyncBlock to consumer chain\n");
+                   return 1;
+               }
+               consumerLink =  returnSyncBlock;
+            }
+
+            /* Add any other options (e.g. limiter) for this branch */
+            if (!setupBranchOptions(&consumerLink, &consOpts[i])) {
+                return 1;
+            }
+
+            /* Attach to producer chain */
+            if (NvSciError_Success !=
+                NvSciStreamBlockConnect(producerLink, consumerLink)) {
+                printf("Failed to connect consumer %d to producer\n", i);
+                return 1;
+            }
+
+        }
+        if (opts.numLateConsumer > 0U) {
+            NvSciError err;
+            err = NvSciStreamBlockSetupStatusSet(multicastBlock,
+                                                 NvSciStreamSetup_Connect,
+                                                 true);
+            if (err != NvSciError_Success) {
+                printf("Attaching a late consumer connection failed=%x\n", err);
+                return 1;
+            }
+        }
+    }
+
+    /*
+     * Otherwise, create any consumer chains resident in this process,
+     *   and connect with IPC back to the producer process.
+     */
+    else {
+
+        for (i=0U; i<prodOpts.numConsumer; ++i) {
+            if (consOpts[i].resident) {
+                if (!consOpts[i].c2cMode && opts.lateAttach) {
+                    if (true != openIpcEndpoint(i, true)) {
+                        printf("Consumer failed to open endpoint \
+                            needed for handshake\n");
+                        return 1;
+                    }
+                    if(true != waitForIpcConnection(i, true)) {
+                        printf("Consumer Connection failed\n");
+                        return 1;
+                    }
+                }
+
+                /* Create consumer */
+                NvSciStreamBlock consumerLink;
+                if (!setupConsumerChain(&consumerLink, &consOpts[i], i)) {
+                    return 1;
+                }
+
+                /* Create IPC block */
+                NvSciStreamBlock producerLink;
+                if (!setupConsToProdIPC(&producerLink,
+                                        &consOpts[i],
+                                        opts.useExtEventService)) {
+                    return 1;
+                }
+
+                /* Connect blocks */
+                if (NvSciError_Success !=
+                    NvSciStreamBlockConnect(producerLink, consumerLink)) {
+                    printf("Failed to connect consumer %d to producer\n", i);
+                    return 1;
+                }
+            }
+        }
+    }
+
+    /* Enter event loop(s) until all blocks are done */
+    if (!eventFuncs->loop()) {
+        ret = 1;
+    }
+
+    /* Wakeup the dispatch thread to terminate upon
+    * stream disconnect
+    */
+    atomic_store(&streamDone, 1);
+    pthread_cond_signal(&cond);
+
+    /* Wait for dispatch thread to terminate */
+    if (opts.numLateConsumer > 0U) {
+        (void)pthread_join(dispatchThread, NULL);
+    }
+
+    if (sciBufModule != NULL) {
+        NvSciBufModuleClose(sciBufModule);
+        sciBufModule = NULL;
+    }
+
+    if (sciSyncModule != NULL) {
+        NvSciSyncModuleClose(sciSyncModule);
+        sciSyncModule = NULL;
+    }
+
+    /* Close the NvSciIpc endpoint */
+    for (uint32_t i = 0U; i< MAX_CONSUMERS; i++) {
+#if (QNX == 1)
+        if (ipcEP[i].coid != 0) {
+            (void)ConnectDetach_r(ipcEP[i].coid);
+            ipcEP[i].coid = 0;
+        }
+        if (ipcEP[i].chid != 0) {
+            (void)ChannelDestroy_r(ipcEP[i].chid);
+            ipcEP[i].chid = 0;
+        }
+#endif
+        if (ipcEP[i].ipcEndpoint) {
+            if (NvSciError_Success !=
+                NvSciIpcCloseEndpointSafe(ipcEP[i].ipcEndpoint, false)) {
+                printf("Failed to close ipc endpoint\n");
+            }
+            ipcEP[i].ipcEndpoint = 0U;
+        }
+        if (ipcEP[i].c2cEndpoint) {
+            if (NvSciError_Success !=
+                NvSciIpcCloseEndpointSafe(ipcEP[i].c2cEndpoint, false)) {
+                printf("Failed to close c2c endpoint\n");
+            }
+            ipcEP[i].c2cEndpoint = 0U;
+        }
+    }
+
+    /* freeing the resources */
+    pthread_mutex_destroy(&mutex);
+    pthread_cond_destroy(&cond);
+
+    NvSciIpcDeinit();
+
+    return ret;
+}
diff --git a/event_sample_app/usecase1.h b/event_sample_app/usecase1.h
new file mode 100644
index 0000000..41a1be0
--- /dev/null
+++ b/event_sample_app/usecase1.h
@@ -0,0 +1,55 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - usecase #1
+ *
+ * This use case consists of CUDA producer and CUDA consumer(s).
+ *   It makes use of the CUDA runtime toolkit.
+ *
+ * There are two packet elements, a large data buffer and a small
+ *   buffer containing a CRC checksum value.
+ *
+ * The producer operation is very simple. It takes a local source buffer
+ *   filled with simple data, and issues a CUDA command to asynchronously
+ *   copy it to the packet's data buffer. It generates a checksum from the
+ *   source buffer and puts that in the packet's CRC buffer.
+ *
+ * The consumer(s) similarly issues a CUDA command to copy the packet's
+ *   data buffer to a local buffer. When finished, it generates a checksum
+ *   from the the local copy and compares it to the value in the packet's
+ *   CRC buffer.
+ *
+ * The data buffer is processed through the CUDA engine, with commands issued
+ *   asynchronously. Sync objects must be used to coordinate when it is safe
+ *   to write and read the buffer. The CRC buffer is written and read directly
+ *   through the CPU. It uses immediate mode and it is not necessary to wait
+ *   for the sync objects before accessing it.
+ *
+ * In addition to the normal case where producers signal sync objects that
+ *   consumers wait for, and vice versa, this use case also needs the
+ *   producer to be able to wait for the fences it generates, in order
+ *   to protect its local buffer from modification while still in use.
+ *   So this use case also provides an example of CPU waiting for fences.
+ */
+
+#ifndef _USECASE1_H
+#define _USECASE1_H 1
+
+/* Names for the packet elements */
+#define ELEMENT_NAME_DATA 0xdada
+#define ELEMENT_NAME_CRC  0xcc
+
+/* Names for the endpoint info */
+#define ENDINFO_NAME_PROC 0xabcd
+
+#endif // _USECASE1_H
diff --git a/event_sample_app/util.c b/event_sample_app/util.c
new file mode 100644
index 0000000..39cc268
--- /dev/null
+++ b/event_sample_app/util.c
@@ -0,0 +1,66 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - utility functions
+ */
+
+#include "util.h"
+
+#define CRC32_POLYNOMIAL 0xEDB88320L
+
+static uint32_t calculateBufferCRC(
+    uint32_t count,
+    uint32_t crc,
+    uint8_t* buffer)
+{
+    static uint32_t crcTable[256];
+    static int initialized = 0;
+    uint32_t i, j, tmp;
+
+    if (!initialized) {
+        for (i = 0; i <= 255; i++) {
+            tmp = i;
+            for (j = 8; j > 0; j--) {
+                if (tmp & 1) {
+                    tmp = (tmp >> 1) ^ CRC32_POLYNOMIAL;
+                } else {
+                    tmp >>= 1;
+                }
+            }
+            crcTable[i] = tmp;
+        }
+        initialized = 1;
+    }
+
+    while (count-- != 0) {
+        tmp = (crc >> 8) & 0x00FFFFFFL;
+        crc = tmp ^ crcTable[((uint32_t) crc ^ *buffer++) & 0xFF];
+    }
+
+    return crc;
+}
+
+uint32_t generateCRC(
+    uint8_t *data_ptr,
+    uint32_t height,
+    uint32_t width,
+    uint32_t pitch)
+{
+    uint32_t y = 0U;
+    uint32_t crc = 0U;
+    for (y = 0U; y < height; y++) {
+        crc = calculateBufferCRC(width, crc, data_ptr);
+        data_ptr += pitch;
+    }
+    return crc;
+}
diff --git a/event_sample_app/util.h b/event_sample_app/util.h
new file mode 100644
index 0000000..11243a8
--- /dev/null
+++ b/event_sample_app/util.h
@@ -0,0 +1,33 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+ *
+ * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+ * property and proprietary rights in and to this material, related
+ * documentation and any modifications thereto. Any use, reproduction,
+ * disclosure or distribution of this material and related documentation
+ * without an express license agreement from NVIDIA CORPORATION or
+ * its affiliates is strictly prohibited.
+ */
+
+/*
+ * NvSciStream Event Loop Driven Sample App - utilities
+ */
+
+#ifndef _UTIL_H
+#define _UTIL_H 1
+
+#include <stdint.h>
+
+#if defined(__x86_64__)
+#define cuDeviceGetUuid           cuDeviceGetUuid_v2
+#endif
+
+/* CRC checksum generator */
+extern uint32_t generateCRC(
+    uint8_t *data_ptr,
+    uint32_t height,
+    uint32_t width,
+    uint32_t pitch);
+
+#endif // _UTIL_H
diff --git a/push_info.txt b/push_info.txt
new file mode 100644
index 0000000..dbe8a31
--- /dev/null
+++ b/push_info.txt
@@ -0,0 +1 @@
+jetson_38.2.1
diff --git a/rawstream/Makefile b/rawstream/Makefile
new file mode 100644
index 0000000..050bed0
--- /dev/null
+++ b/rawstream/Makefile
@@ -0,0 +1,126 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+#
+
+# Location of common libraries
+LIB_DIR = /usr/lib/aarch64-linux-gnu
+# NOTE: This directory PATH will be moved from "tegra" to "nvidia".
+TEGRA_LIB_DIR ?= /usr/lib/aarch64-linux-gnu/tegra
+
+# Location of the CUDA Toolkit
+CUDA_PATH ?= /usr/local/cuda
+
+# Location of NVSCI header
+NVSCI_HEADER_DIR ?= /usr/include/nvsci_headers
+NVSCI_LIB_DIR = $(TEGRA_LIB_DIR)
+
+GCC ?= g++
+NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(GCC)
+
+# internal flags
+NVCCFLAGS   :=
+CCFLAGS     :=
+LDFLAGS     :=
+
+# Extra user flags
+EXTRA_NVCCFLAGS   ?=
+EXTRA_LDFLAGS     ?=
+EXTRA_CCFLAGS     ?=
+
+override abi := aarch64
+LDFLAGS += --dynamic-linker=/lib/ld-linux-aarch64.so.1
+
+# Debug build flags
+dbg = 0
+ifeq ($(dbg),1)
+      NVCCFLAGS += -g -G
+      TARGET := debug
+else
+      TARGET := release
+endif
+
+ALL_CCFLAGS :=
+ALL_CCFLAGS += $(NVCCFLAGS)
+ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
+ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
+ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
+
+ALL_LDFLAGS :=
+ALL_LDFLAGS += $(ALL_CCFLAGS)
+ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
+ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
+
+################################################################################
+
+# Common includes and paths
+INCLUDES  := -I./
+LIBRARIES := -L$(LIB_DIR)
+
+CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
+CUDA_SEARCH_PATH += $(CUDA_PATH)/lib/stubs
+CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
+
+CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
+ifeq ("$(CUDALIB)","")
+  $(error ERROR - libcuda.so not found, CUDA Driver is not installed or CUDA_PATH is not correctly set.)
+else
+  CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
+  LIBRARIES += -L$(CUDALIB) -lcuda -lrt
+endif
+
+# Includes and paths for NVSCI libraries
+NVSCIBUFHEADER  := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscibuf.h -print 2>/dev/null)
+NVSCISYNCHEADER  := $(shell find -L $(NVSCI_HEADER_DIR) -name nvscisync.h -print 2>/dev/null)
+NVSCIIPCHEADER  := $(shell find -L $(NVSCI_HEADER_DIR) -name nvsciipc.h -print 2>/dev/null)
+
+ifeq ("$(NVSCIBUFHEADER)","")
+    $(error ERROR - nvscibuf.h not found in $(NVSCI_HEADER_DIR))
+endif
+ifeq ("$(NVSCISYNCHEADER)","")
+    $(error ERROR - nvscisync.h not found in $(NVSCI_HEADER_DIR))
+endif
+ifeq ("$(NVSCIIPCHEADER)","")
+    $(error ERROR - nvsciipc.h not found in $(NVSCI_HEADER_DIR))
+endif
+
+INCLUDES  += -I$(NVSCI_HEADER_DIR)
+LIBRARIES += -L$(NVSCI_LIB_DIR) -lnvscibuf -lnvscisync -lnvscievent -lnvsciipc -lnvscistream
+
+ALL_CCFLAGS += --std=c++11 --threads 0
+
+################################################################################
+# Target rules
+OUTPUT := rawstream
+
+all: build
+
+build: $(OUTPUT)
+
+OBJ := rawstream_consumer.o
+OBJ += rawstream_cuda.o
+OBJ += rawstream_ipc_linux.o
+OBJ += rawstream_main.o
+OBJ += rawstream_producer.o
+
+%.o: %.c
+	$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) -o $@ -c $<
+
+$(OUTPUT): $(OBJ)
+	$(NVCC) $(ALL_LDFLAGS) -o $@ $+ $(LIBRARIES)
+
+run: build
+	$(OUTPUT)
+
+testrun: build
+
+clean:
+	rm -f $(OBJ) $(OUTPUT)
+
+clobber: clean
diff --git a/rawstream/README.txt b/rawstream/README.txt
new file mode 100644
index 0000000..b0b31b9
--- /dev/null
+++ b/rawstream/README.txt
@@ -0,0 +1,41 @@
+Rawstream Sample App - README
+
+Copyright (c) 2022 NVIDIA Corporation. All rights reserved.
+
+NVIDIA Corporation and its licensors retain all intellectual property and
+proprietary rights in and to this software, related documentation and any
+modifications thereto. Any use, reproduction, disclosure or distribution
+of this software and related documentation without an express license
+agreement from NVIDIA Corporation is strictly prohibited.
+
+---
+# rawstream - NvStreams Rawstream Sample App
+
+## Description
+
+This directory contains a raw stream sample application using NvSciBuf,
+NvSciSync and NvSciIpc.
+
+
+## Build the application
+
+The rawstream sample includes source code and a Makefile.
+1. On the host system, navigate to the sample application directory:
+
+       $ cd <top>/samples/nvsci/rawstream/
+
+2. Build the sample application:
+
+       $ make clean
+       $ make
+
+
+## Examples of how to run the sample application:
+
+    $ sudo ./rawstream -p &
+    $ sudo ./rawstream -c
+
+## Examples of how to run the sample application for late attach:
+
+    $ sudo ./rawstream -p -l &
+    $ sudo ./rawstream -c -l
diff --git a/rawstream/rawstream.h b/rawstream/rawstream.h
new file mode 100644
index 0000000..e55554a
--- /dev/null
+++ b/rawstream/rawstream.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2020-2025 NVIDIA Corporation.  All Rights Reserved.
+ *
+ * NVIDIA Corporation and its licensors retain all intellectual property and
+ * proprietary rights in and to this software and related documentation.  Any
+ * use, reproduction, disclosure or distribution of this software and related
+ * documentation without an express license agreement from NVIDIA Corporation
+ * is strictly prohibited.
+ */
+
+#ifndef _rawstream_h
+#define _rawstream_h
+
+#include <unistd.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <nvscisync.h>
+#include <nvscibuf.h>
+#include "cuda.h"
+#include "cuda_runtime_api.h"
+
+// Constants controlling configuration
+#define totalFrames   32U
+#define totalBuffers   4U
+
+#if defined(__x86_64__)
+#define cuDeviceGetUuid           cuDeviceGetUuid_v2
+#endif
+
+// Sync/Buf modules
+extern NvSciSyncModule   syncModule;
+extern NvSciBufModule    bufModule;
+
+// Exchanged sync info
+extern NvSciSyncAttrList producerSignalAttrs;
+extern NvSciSyncAttrList consumerSignalAttrs;
+extern NvSciSyncAttrList producerWaitAttrs;
+extern NvSciSyncAttrList consumerWaitAttrs;
+extern NvSciSyncAttrList prodToConsAttrs;
+extern NvSciSyncAttrList consToProdAttrs;
+extern NvSciSyncObj      consumerSignalObj;
+extern NvSciSyncObj      producerSignalObj;
+extern NvSciSyncObj      consumerWaitObj;
+extern NvSciSyncObj      producerWaitObj;
+
+// Exchanged buf info
+extern NvSciBufAttrList  producerWriteAttrs;
+extern NvSciBufAttrList  consumerReadAttrs;
+extern NvSciBufAttrList  combinedBufAttrs;
+
+// CUDA info common to producer and consumer
+typedef struct {
+    int                     deviceId;
+    CUuuid                  uuid;
+    cudaStream_t            stream;
+    cudaExternalSemaphore_t signalerSem;
+    cudaExternalSemaphore_t waiterSem;
+    NvSciBufType            bufType;
+    uint64_t                bufSize;
+    uint8_t*                bufCopy;
+} CudaClientInfo;
+
+// List of buffers with status
+typedef struct {
+    // Buffer handle
+    NvSciBufObj          obj;
+    // CUDA external memory object
+    cudaExternalMemory_t extMem;
+    // Mapping into virtual memory
+    uint8_t*             ptr;
+    // Current owner (0 = producer, 1 = consumer)
+    uint32_t             owner;
+    // Fence to wait for
+    NvSciSyncFence       fence;
+    // Checksum for error checking
+    uint32_t             crc;
+} Buffer;
+extern Buffer buffers[totalBuffers];
+
+// packet data
+// Note: The checksum is not, in general, needed in a real streaming
+//       application. All that is required is something to identify
+//       the buffer and provide the fences. See comments in the producer
+//       and consumer for the reason for the checksum.
+typedef struct {
+    // buffer identifier
+    uint32_t                          bufferId;
+    // buffer checksum
+    uint32_t                          crc;
+    // Fence to wait for
+    NvSciSyncFenceIpcExportDescriptor fenceDesc;
+} Packet;
+
+// IPC related info
+typedef struct {
+    // NvSciIpc handle
+    NvSciIpcEndpoint            endpoint;
+    // IPC channel info
+    struct NvSciIpcEndpointInfo info;
+
+    // QNX: Channel id to get event
+    int32_t chId;
+    // QNX: Connection id to send event in library
+    int32_t connId;
+    // Linux: IPC event fd
+    int32_t ipcEventFd;
+} IpcWrapper;
+extern IpcWrapper ipcWrapper;
+
+// CUDA data types
+typedef struct cudaExternalSemaphoreHandleDesc cudaExternalSemaphoreHandleDesc;
+typedef struct cudaExternalMemoryHandleDesc cudaExternalMemoryHandleDesc;
+typedef struct cudaExternalMemoryBufferDesc cudaExternalMemoryBufferDesc;
+typedef struct cudaExternalSemaphoreWaitParams cudaExternalSemaphoreWaitParams;
+typedef struct cudaExternalSemaphoreSignalParams cudaExternalSemaphoreSignalParams;
+
+// Utility functions
+extern uint32_t GenerateCRC(uint8_t* data, uint32_t width, uint32_t height, uint32_t pitch);
+
+// Thread functions
+extern void* producerFunc(void*);
+extern void* consumerFunc(void*);
+
+// IPC functions
+extern NvSciError ipcInit(const char* endpointName, IpcWrapper* ipcWrapper);
+extern NvSciError ipcSend(IpcWrapper* ipcWrapper, const void* buf, const size_t size);
+extern NvSciError ipcRecvFill(IpcWrapper* ipcWrapper, void* buf, const size_t size);
+extern void ipcDeinit(IpcWrapper* ipcWrapper);
+
+// CUDA-specific operations
+extern bool setupCuda(CudaClientInfo* info);
+extern bool setupCudaSync(CudaClientInfo* info,
+                          NvSciSyncObj sciSignalObj,
+                          NvSciSyncObj sciWaitObj);
+extern bool setupCudaBufAttr(CudaClientInfo* info,
+                             NvSciBufAttrList attrs);
+extern bool setupCudaBuffer(CudaClientInfo* info,
+                            Buffer* buf);
+extern bool waitCudaFence(CudaClientInfo* info,
+                          Buffer* buf);
+extern bool signalCudaFence(CudaClientInfo* info,
+                            Buffer* buf);
+extern void deinitCuda(CudaClientInfo* info);
+extern void deinitCudaBuffer(Buffer* buf, int num);
+
+#endif // _rawstream_h
diff --git a/rawstream/rawstream_consumer.c b/rawstream/rawstream_consumer.c
new file mode 100644
index 0000000..e77a40d
--- /dev/null
+++ b/rawstream/rawstream_consumer.c
@@ -0,0 +1,670 @@
+/*
+ * Copyright (c) 2020-2024 NVIDIA Corporation.  All Rights Reserved.
+ *
+ * NVIDIA Corporation and its licensors retain all intellectual property and
+ * proprietary rights in and to this software and related documentation.  Any
+ * use, reproduction, disclosure or distribution of this software and related
+ * documentation without an express license agreement from NVIDIA Corporation
+ * is strictly prohibited.
+ */
+
+#include "rawstream.h"
+
+void* consumerFunc(void* arg)
+{
+    CudaClientInfo cudaInfo;
+    NvSciError     sciErr;
+    int            cudaErr;
+    void* recvWaitListDesc = NULL;
+    void* recvObjAndListDesc = NULL;
+    void* recvBufListDesc = NULL;
+
+    *(int*)arg = 1;
+    fprintf(stderr, "Consumer starting\n");
+
+    // Do common cuda initialization
+    if (!setupCuda(&cudaInfo)) {
+        goto done;
+    }
+
+    // Create an empty sync attribute list for signaling permissions.
+    sciErr = NvSciSyncAttrListCreate(syncModule, &consumerSignalAttrs);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to create consumer signal attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Query CUDA for attributes needed to signal syncs
+    cudaErr = cudaDeviceGetNvSciSyncAttributes(consumerSignalAttrs,
+                                               cudaInfo.deviceId,
+                                               cudaNvSciSyncAttrSignal);
+    if (cudaSuccess != cudaErr) {
+        fprintf(stderr,
+                "Could not query signal attributes from CUDA (%d)\n",
+                cudaErr);
+        goto done;
+    }
+
+    fprintf(stderr, "Consumer signal attributes established\n");
+
+    // Create an empty sync attribute list for waiting permissions.
+    sciErr = NvSciSyncAttrListCreate(syncModule, &consumerWaitAttrs);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to create consumer wait attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Query CUDA for attributes needed to wait for syncs
+    cudaErr = cudaDeviceGetNvSciSyncAttributes(consumerWaitAttrs,
+                                               cudaInfo.deviceId,
+                                               cudaNvSciSyncAttrWait);
+    if (cudaSuccess != cudaErr) {
+        fprintf(stderr,
+                "Could not query wait attributes from CUDA (%d)\n",
+                cudaErr);
+        goto done;
+    }
+
+    fprintf(stderr, "Consumer wait attributes established\n");
+
+    // Export consumer's wait attributes to a form suitable for IPC
+    size_t sendWaitAttrListSize = 0U;
+    void* sendWaitListDesc = NULL;
+    sciErr = NvSciSyncAttrListIpcExportUnreconciled(&consumerWaitAttrs,
+                                                    1,
+                                                    ipcWrapper.endpoint,
+                                                    &sendWaitListDesc,
+                                                    &sendWaitAttrListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to export consumer wait attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Send the size of the consumer's wait attributes to the producer,
+    //   so it knows how much data to expect
+    sciErr = ipcSend(&ipcWrapper,
+                     &sendWaitAttrListSize,
+                     sizeof(sendWaitAttrListSize));
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to send consumer wait attrs size (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Send the exported form of the consumer's wait attributes
+    sciErr = ipcSend(&ipcWrapper,
+                     sendWaitListDesc,
+                     sendWaitAttrListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr, "Unable to send consumer wait attrs (%x)\n", sciErr);
+        goto done;
+    }
+
+    // Wait to receive the size of the producer's wait attributes
+    size_t recvWaitAttrListSize = 0U;
+    sciErr = ipcRecvFill(&ipcWrapper,
+                         &recvWaitAttrListSize,
+                         sizeof(recvWaitAttrListSize));
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to recv producer wait attr size (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Allocate a buffer big enough to receive the producer's wait attributes
+    recvWaitListDesc = malloc(recvWaitAttrListSize);
+    if (recvWaitListDesc == NULL) {
+        sciErr = NvSciError_InsufficientMemory;
+        fprintf(stderr,
+                "Sync attr allocation failed (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Wait to receive producer's wait attributes
+    sciErr = ipcRecvFill(&ipcWrapper,
+                         recvWaitListDesc,
+                         recvWaitAttrListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to recv producer wait attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Convert the received producer wait attributes to an attribute list
+    sciErr = NvSciSyncAttrListIpcImportUnreconciled(syncModule,
+                                                    ipcWrapper.endpoint,
+                                                    recvWaitListDesc,
+                                                    recvWaitAttrListSize,
+                                                    &producerWaitAttrs);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to import producer wait attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Get combined attributes for consumer to producer signaling
+    NvSciSyncAttrList syncAllAttrs[2], syncConflictAttrs;
+    syncAllAttrs[0] = consumerSignalAttrs;
+    syncAllAttrs[1] = producerWaitAttrs;
+    sciErr = NvSciSyncAttrListReconcile(syncAllAttrs, 2,
+                                        &consToProdAttrs, &syncConflictAttrs);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't merge consumer->producer attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Allocate consumer to producer sync object
+    sciErr = NvSciSyncObjAlloc(consToProdAttrs, &consumerSignalObj);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't allocate consumer->producer sync (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Export sync attributes and object to a form suitable for IPC
+    void* sendObjAndListDesc = NULL;
+    size_t sendObjAndListSize = 0U;
+    sciErr = NvSciSyncIpcExportAttrListAndObj(consumerSignalObj,
+                                              NvSciSyncAccessPerm_WaitOnly,
+                                              ipcWrapper.endpoint,
+                                              &sendObjAndListDesc,
+                                              &sendObjAndListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't export consumer->producer sync description (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Send the size of the sync description to the producer,
+    //   so it knows how much data to expect
+    sciErr = ipcSend(&ipcWrapper, &sendObjAndListSize, sizeof(size_t));
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't send consumer->producer sync description size(%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Send the sync description to the producer
+    sciErr = ipcSend(&ipcWrapper, sendObjAndListDesc, sendObjAndListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't send consumer->producer sync description (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Wait to receive the size of the producer->consumer sync desription
+    size_t recvObjAndListSize = 0U;
+    sciErr = ipcRecvFill(&ipcWrapper,
+                         &recvObjAndListSize,
+                         sizeof(size_t));
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't recv producer->consumer sync description size (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Allocate a buffer big enough to receive the description
+    recvObjAndListDesc = malloc(recvObjAndListSize);
+    if (NULL == recvObjAndListDesc) {
+        sciErr = NvSciError_InsufficientMemory;
+        fprintf(stderr, "Sync description allocation failed (%x)\n", sciErr);
+        goto done;
+    }
+
+    // Wait to receive producer->consumer sync description
+    sciErr = ipcRecvFill(&ipcWrapper,
+                         recvObjAndListDesc,
+                         recvObjAndListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't receive producer->consumer sync description (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Convert the received producer->consumer sync description to a
+    //   sync attribute list and object
+    sciErr = NvSciSyncIpcImportAttrListAndObj(syncModule,
+                                              ipcWrapper.endpoint,
+                                              recvObjAndListDesc,
+                                              recvObjAndListSize,
+                                              &consumerWaitAttrs,
+                                              1,
+                                              NvSciSyncAccessPerm_WaitOnly,
+                                              ipcWrapper.endpoint,
+                                              &consumerWaitObj);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't import producer->consumer sync (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Validate imported reconciled attribute list and object
+    {
+        NvSciSyncAttrList consumerWaitList;
+
+        sciErr = NvSciSyncAttrListValidateReconciledAgainstAttrs(
+            consToProdAttrs,
+            NULL,
+            0,
+            NvSciSyncAccessPerm_SignalOnly);
+        if (NvSciError_Success != sciErr) {
+            fprintf(
+                stderr,
+                "Validation of consToProd list failed: %x\n", sciErr);
+            goto done;
+        }
+
+        sciErr = NvSciSyncObjGetAttrList(consumerWaitObj,
+                                         &consumerWaitList);
+        if (NvSciError_Success != sciErr) {
+            fprintf(stderr,
+                    "Can't get the reconciled list from consumer wait object (%x)\n",
+                    sciErr);
+            goto done;
+        }
+
+        sciErr = NvSciSyncAttrListValidateReconciledAgainstAttrs(
+            consumerWaitList,
+            NULL,
+            0,
+            NvSciSyncAccessPerm_WaitOnly);
+        if (NvSciError_Success != sciErr) {
+            fprintf(
+                stderr,
+                "Validation of imported reconciled consumer wait list failed: %x\n",
+                sciErr);
+            goto done;
+        }
+
+        sciErr = NvSciSyncObjValidate(consumerWaitObj);
+        if (NvSciError_Success != sciErr) {
+            fprintf(
+                stderr,
+                "Validation of imported consumer wait object failed: %x\n",
+                sciErr);
+            goto done;
+        }
+    }
+
+    // Set up CUDA sync objects, importing NvSciSync objects
+    if (!setupCudaSync(&cudaInfo, consumerSignalObj, consumerWaitObj)) {
+        goto done;
+    }
+
+    fprintf(stderr, "Consumer exchanged sync objects with producer\n");
+
+    // Create an empty buffer attribute list for consumer buffers
+    sciErr = NvSciBufAttrListCreate(bufModule, &consumerReadAttrs);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to create consumer buffer attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Fill consumer buffer attribute list with values
+    NvSciBufAttrKeyValuePair bufKeyValue[4];
+    NvSciRmGpuId gpuId;
+    memcpy(&gpuId.bytes, &cudaInfo.uuid.bytes, sizeof(cudaInfo.uuid.bytes));
+    bufKeyValue[0].key   = NvSciBufGeneralAttrKey_GpuId;
+    bufKeyValue[0].value = &gpuId;
+    bufKeyValue[0].len   = sizeof(gpuId);
+    NvSciBufType bufType = NvSciBufType_RawBuffer;
+    bufKeyValue[1].key   = NvSciBufGeneralAttrKey_Types;
+    bufKeyValue[1].value = &bufType;
+    bufKeyValue[1].len   = sizeof(bufType);
+    NvSciBufAttrValAccessPerm bufPerm = NvSciBufAccessPerm_Readonly;
+    bufKeyValue[2].key   = NvSciBufGeneralAttrKey_RequiredPerm;
+    bufKeyValue[2].value = &bufPerm;
+    bufKeyValue[2].len   = sizeof(bufPerm);
+    bool   bufAccessFlag = true;
+    bufKeyValue[3].key   = NvSciBufGeneralAttrKey_NeedCpuAccess;
+    bufKeyValue[3].value = &bufAccessFlag;
+    bufKeyValue[3].len   = sizeof(bufAccessFlag);
+
+    sciErr = NvSciBufAttrListSetAttrs(consumerReadAttrs, bufKeyValue, 4);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr, "Unable to fill consumer buffer attrs (%x)\n", sciErr);
+        goto done;
+    }
+
+    fprintf(stderr, "Consumer buffer attributes established\n");
+
+    // Export consumer buffer attributes in a form suitable for IPC
+    size_t consumerReadAttrsSize = 0U;
+    void* consumerReadAttrsDesc = NULL;
+    sciErr = NvSciBufAttrListIpcExportUnreconciled(&consumerReadAttrs,
+                                                   1,
+                                                   ipcWrapper.endpoint,
+                                                   &consumerReadAttrsDesc,
+                                                   &consumerReadAttrsSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to export consumer buffer attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Send size of consumer buffer attributes
+    sciErr = ipcSend(&ipcWrapper,
+                     &consumerReadAttrsSize,
+                     sizeof(consumerReadAttrsSize));
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to send consumer buffer attrs size (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Send consumer buffer attributes
+    sciErr = ipcSend(&ipcWrapper,
+                     consumerReadAttrsDesc,
+                     consumerReadAttrsSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr, "Unable to send consumer buffer attrs (%x)\n", sciErr);
+        goto done;
+    }
+
+    // Wait to receive the size of the combined buffer attributes
+    size_t recvBufListSize = 0U;
+    sciErr = ipcRecvFill(&ipcWrapper,
+                         &recvBufListSize,
+                         sizeof(recvBufListSize));
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to receive combinedbuffer attr size (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Allocate a buffer big enough to receive the combined buffer attributes
+    recvBufListDesc = malloc(recvBufListSize);
+    if (NULL == recvBufListDesc) {
+        sciErr = NvSciError_InsufficientMemory;
+        fprintf(stderr, "Buffer attr allocation failed(%x)\n", sciErr);
+        goto done;
+    }
+
+    // Receive the combined buffer attributes
+    sciErr = ipcRecvFill(&ipcWrapper,
+                         recvBufListDesc,
+                         recvBufListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to recv combined buffer attr desc (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Convert the combined buffer attributes to an attribute list
+    sciErr = NvSciBufAttrListIpcImportReconciled(bufModule,
+                                                 ipcWrapper.endpoint,
+                                                 recvBufListDesc,
+                                                 recvBufListSize,
+                                                 NULL,
+                                                 0,
+                                                 &combinedBufAttrs);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to import combined buffer attr (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Extract attributes needed by CUDA
+    if (!setupCudaBufAttr(&cudaInfo, combinedBufAttrs)) {
+        goto done;
+    }
+
+    // Receive all buffers
+    for (uint32_t i=0U; i<totalBuffers; ++i) {
+
+        Buffer* buf = &buffers[i];
+
+        // Receive the next buffer description
+        NvSciBufObjIpcExportDescriptor objDesc;
+        sciErr = ipcRecvFill(&ipcWrapper,
+                             &objDesc,
+                             sizeof(NvSciBufObjIpcExportDescriptor));
+        if (NvSciError_Success != sciErr) {
+            fprintf(stderr,
+                    "Unable to recv buffer %d from producer (%x)\n",
+                    i, sciErr);
+            goto done;
+        }
+
+        // Convert buffer description to a buffer object
+        sciErr = NvSciBufObjIpcImport(ipcWrapper.endpoint,
+                                      &objDesc,
+                                      combinedBufAttrs,
+                                      NvSciBufAccessPerm_Readonly,
+                                      1000U,
+                                      &buf->obj);
+        if (NvSciError_Success != sciErr) {
+            fprintf(stderr,
+                    "Unable to import buffer %d from producer (%x)\n",
+                    i, sciErr);
+            goto done;
+        }
+
+        // Import the buffer into CUDA
+        if (!setupCudaBuffer(&cudaInfo, buf)) {
+            goto done;
+        }
+
+        // Validate handles before entering runtime phase
+        {
+            NvSciBufAttrList reconciledList;
+
+            sciErr = NvSciBufObjGetAttrList(buf->obj, &reconciledList);
+            if (NvSciError_Success != sciErr) {
+                fprintf(stderr,
+                        "Can't get the reconciled list from NvSciBufObj %d (%x)\n",
+                        i, sciErr);
+                goto done;
+            }
+
+            NvSciBufAttrListValidateReconciledAgainstAttrs(reconciledList, bufKeyValue, 4);
+            if (NvSciError_Success != sciErr) {
+                fprintf(stderr,
+                        "Validation of combinedBufAttrs list failed: %x\n", sciErr);
+                goto done;
+            }
+
+            sciErr = NvSciBufObjValidate(buf->obj);
+            if (NvSciError_Success != sciErr) {
+                fprintf(stderr,
+                        "Validation of imported buffer %d failed (%x)\n",
+                        i, sciErr);
+                goto done;
+            }
+        }
+    }
+
+    fprintf(stderr, "Consumer buffers received and established\n");
+
+    // Receive all frames
+    uint32_t currFrame  = 0;
+    uint32_t currBuffer = 0;
+    Packet packet;
+    while (currFrame < totalFrames) {
+        fprintf(stderr, "Consumer starting frame %d in buffer %d\n",
+                currFrame, currBuffer);
+        Buffer* buf = &buffers[currBuffer];
+
+        // Wait for buffer to be available
+        while (buf->owner != 1U) {
+
+            // Wait for next presented buffer
+            sciErr = ipcRecvFill(&ipcWrapper, &packet, sizeof(packet));
+            if (NvSciError_Success != sciErr) {
+                fprintf(stderr,
+                        "Failure to recv buffer from producer (%x)\n",
+                        sciErr);
+                goto done;
+            }
+
+            // Import transmitted fence description to a fence
+            sciErr = NvSciSyncIpcImportFence(consumerWaitObj,
+                                             &packet.fenceDesc,
+                                             &buffers[packet.bufferId].fence);
+            if (NvSciError_Success != sciErr) {
+                fprintf(stderr,
+                        "Failure to import fence from producer (%x)\n",
+                        sciErr);
+                goto done;
+            }
+
+            // copy CRC data from packet
+            buffers[packet.bufferId].crc = packet.crc;
+
+            // Mark consumer as owner of this buffer
+            buffers[packet.bufferId].owner = 1U;
+        }
+
+        // Wait for fence generated by producer before reading
+        if (!waitCudaFence(&cudaInfo, buf)) {
+            goto done;
+        }
+
+        // Read the buffer to the local copy
+        cudaErr = cudaMemcpy2DAsync(cudaInfo.bufCopy,
+                                    cudaInfo.bufSize,
+                                    buf->ptr,
+                                    cudaInfo.bufSize,
+                                    cudaInfo.bufSize,
+                                    1,
+                                    cudaMemcpyDeviceToHost,
+                                    cudaInfo.stream);
+        if (cudaSuccess != cudaErr) {
+            fprintf(stderr, "Unable to initiate CUDA copy (%d)\n", cudaErr);
+            goto done;
+        }
+
+        // Wait for operation to finish, then compute and compare checksum
+        // IMPORTANT NOTE:
+        //   A normal stream application would not perform these steps.
+        //   A checksum is not required for streaming, and waiting for
+        //     operations to finish (which we only need because the
+        //     checksum is calculated by the CPU) introduces bubbles
+        //     in the hardware pipeline. A real application can rely on
+        //     the generated NvSciSync fences for synchronization.
+        //   These steps are only taken in this sample application
+        //     because the consumer has no output visible to the user,
+        //     so the checksum allows us to verify that the application
+        //     is behaving properly.
+        cudaDeviceSynchronize();
+        uint32_t crc = GenerateCRC(cudaInfo.bufCopy,
+                                   1,
+                                   cudaInfo.bufSize,
+                                   cudaInfo.bufSize);
+        if (buf->crc != crc) {
+            fprintf(stderr, "Checksums don't match (%x vs %x)\n",
+                    crc, buf->crc);
+            goto done;
+        }
+
+        fprintf(stderr, "Consumer read frame %d in buffer %d\n",
+                currFrame, currBuffer);
+
+        // Generate new fence indicating when reading has finished
+        if (!signalCudaFence(&cudaInfo, buf)) {
+            goto done;
+        }
+
+        // Mark buffer as owned by producer now
+        buf->owner = 0U;
+
+        // Export buffer index and fence for transmission over IPC
+        // There is no checksum for the return trip.
+        packet.bufferId = currBuffer;
+        packet.crc      = 0U;
+        sciErr = NvSciSyncIpcExportFence(&buf->fence,
+                                         ipcWrapper.endpoint,
+                                         &packet.fenceDesc);
+        if (NvSciError_Success != sciErr) {
+            fprintf(stderr, "Unable to export consumer fence (%x)\n", sciErr);
+            goto done;
+        }
+
+        // Send buffer index and fence to producer
+        sciErr = ipcSend(&ipcWrapper, &packet, sizeof(packet));
+        if (NvSciError_Success != sciErr) {
+            fprintf(stderr,
+                    "Failure to send buffer to producer (%x)\n",
+                    sciErr);
+            goto done;
+        }
+
+        fprintf(stderr, "Consumer finished frame %d in buffer %d\n",
+                currFrame, currBuffer);
+
+        // Advance buffer and frame
+        currBuffer = (currBuffer + 1U) % totalBuffers;
+        currFrame++;
+    }
+
+    // Success
+    *(int*)arg = 0;
+done:
+    // Free CUDA resources
+    deinitCudaBuffer(buffers, totalBuffers);
+    deinitCuda(&cudaInfo);
+
+    // Free NvSci objects
+    if (NULL != consumerSignalAttrs)
+        NvSciSyncAttrListFree(consumerSignalAttrs);
+    if (NULL != consumerWaitAttrs)
+        NvSciSyncAttrListFree(consumerWaitAttrs);
+    if (NULL != sendWaitListDesc)
+        NvSciSyncAttrListFreeDesc(sendWaitListDesc);
+    if (NULL != producerWaitAttrs)
+        NvSciSyncAttrListFree(producerWaitAttrs);
+    if (NULL != consToProdAttrs)
+        NvSciSyncAttrListFree(consToProdAttrs);
+    if (NULL != syncConflictAttrs)
+        NvSciSyncAttrListFree(syncConflictAttrs);
+    if (NULL != consumerSignalObj)
+        NvSciSyncObjFree(consumerSignalObj);
+    if (NULL != sendObjAndListDesc)
+        NvSciSyncAttrListAndObjFreeDesc(sendObjAndListDesc);
+    if (NULL != consumerWaitObj)
+        NvSciSyncObjFree(consumerWaitObj);
+    if (NULL != consumerReadAttrs)
+        NvSciBufAttrListFree(consumerReadAttrs);
+    if (NULL != consumerReadAttrsDesc)
+        NvSciBufAttrListFreeDesc(consumerReadAttrsDesc);
+    if (NULL != combinedBufAttrs)
+        NvSciBufAttrListFree(combinedBufAttrs);
+
+    // Free malloc'd resources
+    if (NULL != recvWaitListDesc)
+        free(recvWaitListDesc);
+    if (NULL != recvObjAndListDesc)
+        free(recvObjAndListDesc);
+    if (NULL != recvBufListDesc)
+        free(recvBufListDesc);
+
+    fprintf(stderr, "Consumer exiting\n");
+    return NULL;
+}
diff --git a/rawstream/rawstream_cuda.c b/rawstream/rawstream_cuda.c
new file mode 100644
index 0000000..b0d6e42
--- /dev/null
+++ b/rawstream/rawstream_cuda.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2020-2021 NVIDIA Corporation.  All Rights Reserved.
+ *
+ * NVIDIA Corporation and its licensors retain all intellectual property and
+ * proprietary rights in and to this software and related documentation.  Any
+ * use, reproduction, disclosure or distribution of this software and related
+ * documentation without an express license agreement from NVIDIA Corporation
+ * is strictly prohibited.
+ */
+
+#include "rawstream.h"
+
+// Initialize CUDA info
+bool setupCuda(CudaClientInfo* info)
+{
+    int cudaErr;
+
+    info->deviceId = 0;
+    info->stream = NULL;
+    info->signalerSem = NULL;
+    info->waiterSem = NULL;
+    info->bufCopy = NULL;
+
+    int numOfGPUs = 0;
+    cudaErr = cudaGetDeviceCount(&numOfGPUs);
+    if (cudaSuccess != cudaErr) {
+        fprintf(stderr, "Failed to get compute-capable devices (%d)\n", cudaErr);
+        return false;
+    }
+
+    cudaErr = cudaSetDevice(info->deviceId);
+    if (cudaSuccess != cudaErr) {
+        fprintf(stderr, "Failed to set CUDA device (%d)\n", cudaErr);
+        return false;
+    }
+
+    cudaErr = cuDeviceGetUuid(&info->uuid, info->deviceId);
+    if (CUDA_SUCCESS != cudaErr) {
+        fprintf(stderr, "Failed to query CUDA UUID (%d)\n", cudaErr);
+        return false;
+    }
+
+    return true;
+}
+
+// Create CUDA sync objects and map to imported NvSciSync
+bool setupCudaSync(CudaClientInfo* info,
+                   NvSciSyncObj sciSignalObj,
+                   NvSciSyncObj sciWaitObj)
+{
+    cudaExternalSemaphoreHandleDesc extSemDesc;
+    int cudaErr;
+
+    // Create CUDA stream for signaling and waiting
+    cudaErr = cudaStreamCreateWithFlags(&info->stream,
+                                        cudaStreamNonBlocking);
+    if (cudaSuccess != cudaErr) {
+        fprintf(stderr,
+                "Unable to create CUDA stream (%d)\n",
+                cudaErr);
+        return false;
+    }
+
+    // Import signaler sync object to CUDA semaphore
+    memset(&extSemDesc, 0, sizeof(extSemDesc));
+    extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
+    extSemDesc.handle.nvSciSyncObj = sciSignalObj;
+    cudaErr = cudaImportExternalSemaphore(&info->signalerSem, &extSemDesc);
+    if (cudaSuccess != cudaErr) {
+        fprintf(stderr,
+                "Unable to import signal sync object to CUDA (%d)\n",
+                cudaErr);
+        return false;
+    }
+
+    // Import waiter sync object to CUDA semaphore
+    memset(&extSemDesc, 0, sizeof(extSemDesc));
+    extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
+    extSemDesc.handle.nvSciSyncObj = sciWaitObj;
+    cudaErr = cudaImportExternalSemaphore(&info->waiterSem, &extSemDesc);
+    if (cudaSuccess != cudaErr) {
+        fprintf(stderr,
+                "Unable to import wait sync object to CUDA (%d)\n",
+                cudaErr);
+        return false;
+    }
+
+    return true;
+}
+
+// Extract info from buffer attributes needed by CUDA
+bool setupCudaBufAttr(CudaClientInfo* info,
+                      NvSciBufAttrList attrs)
+{
+    NvSciBufAttrKeyValuePair queryKeyValue[] = {
+        { NvSciBufGeneralAttrKey_Types,  NULL, 0 },
+        { NvSciBufRawBufferAttrKey_Size, NULL, 0 },
+    };
+
+    NvSciError sciErr = NvSciBufAttrListGetAttrs(attrs, queryKeyValue, 2);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr, "Unable to query buffer type/size (%x)\n", sciErr);
+        return false;
+    }
+
+    // TODO: Original sample queries BufType but doesn't seem to do anything
+    //       with it. Might not be needed.
+    info->bufType = *((NvSciBufType*)(queryKeyValue[0].value));
+    info->bufSize = *((uint64_t*)(queryKeyValue[1].value));
+
+    // Allocate storage for a copy of the buffer contents
+    info->bufCopy = (uint8_t*)malloc(info->bufSize);
+    if (NULL == info->bufCopy) {
+        fprintf(stderr, "Unable to allocate buffer copy\n");
+        return false;
+    }
+    (void)memset(info->bufCopy, 0, info->bufSize);
+
+    return true;
+}
+
+// Import NvSciBuf into CUDA
+bool setupCudaBuffer(CudaClientInfo* info,
+                     Buffer* buf)
+{
+    int cudaErr;
+
+    // Import buffer to cuda as external memory
+    cudaExternalMemoryHandleDesc memHandleDesc;
+    memset(&memHandleDesc, 0, sizeof(memHandleDesc));
+    memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
+    memHandleDesc.handle.nvSciBufObject = buf->obj;
+    memHandleDesc.size = info->bufSize;
+
+    cudaErr = cudaImportExternalMemory(&buf->extMem, &memHandleDesc);
+    if (cudaSuccess != cudaErr) {
+        fprintf(stderr,
+                "Unable to import buffer to CUDA (%d)\n",
+                cudaErr);
+        return false;
+    }
+
+    // Map to cuda memory buffer
+    cudaExternalMemoryBufferDesc bufferDesc;
+    memset(&bufferDesc, 0, sizeof(bufferDesc));
+    bufferDesc.size = info->bufSize;
+    bufferDesc.offset = 0;
+    cudaErr = cudaExternalMemoryGetMappedBuffer((void *)&buf->ptr,
+                                                buf->extMem,
+                                                &bufferDesc);
+    if (cudaSuccess != cudaErr) {
+        fprintf(stderr, "Unable to map CUDA buffer (%d)\n", cudaErr);
+        return false;
+    }
+
+    return true;
+}
+
+// Tell CUDA to wait for the fence associated with a buffer
+bool waitCudaFence(CudaClientInfo* info,
+                   Buffer* buf)
+{
+    cudaExternalSemaphoreWaitParams waitParams;
+    memset(&waitParams, 0, sizeof(waitParams));
+    waitParams.params.nvSciSync.fence = &buf->fence;
+    waitParams.flags = 0;
+    int cudaErr = cudaWaitExternalSemaphoresAsync(&info->waiterSem,
+                                                  &waitParams,
+                                                  1,
+                                                  info->stream);
+    if (cudaSuccess != cudaErr) {
+        fprintf(stderr, "Unable to wait for fence (%d)\n", cudaErr);
+        return false;
+    }
+
+    NvSciSyncFenceClear(&buf->fence);
+
+    return true;
+}
+
+// Tell CUDA to generate a fence for a buffer
+bool signalCudaFence(CudaClientInfo* info,
+                     Buffer* buf)
+{
+    cudaExternalSemaphoreSignalParams signalParams;
+    memset(&signalParams, 0, sizeof(signalParams));
+    signalParams.params.nvSciSync.fence = &buf->fence;
+    signalParams.flags = 0;
+
+    int cudaErr = cudaSignalExternalSemaphoresAsync(&info->signalerSem,
+                                                    &signalParams,
+                                                    1,
+                                                    info->stream);
+    if (cudaSuccess != cudaErr) {
+        fprintf(stderr, "Unable to signal fence (%d)\n", cudaErr);
+        return false;
+    }
+
+    return true;
+}
+
+void deinitCuda(CudaClientInfo* info)
+{
+    if (NULL != info->bufCopy) {
+        free(info->bufCopy);
+        info->bufCopy = NULL;
+    }
+    if (NULL != info->signalerSem) {
+        (void)cudaDestroyExternalSemaphore(info->signalerSem);
+        info->signalerSem = NULL;
+    }
+    if (NULL != info->waiterSem) {
+        (void)cudaDestroyExternalSemaphore(info->waiterSem);
+        info->waiterSem = NULL;
+    }
+    if (NULL != info->stream) {
+        (void)cudaStreamDestroy(info->stream);
+        info->stream = NULL;
+    }
+}
+
+void deinitCudaBuffer(Buffer* buf, int num)
+{
+    int i;
+    for (i = 0; i < num; ++i) {
+        if (NULL != buf[i].ptr)
+            cudaFree(buf[i].ptr);
+        if (NULL != buf[i].extMem)
+            (void)cudaDestroyExternalMemory(buf[i].extMem);
+        if (NULL != buf[i].obj)
+            NvSciBufObjFree(buf[i].obj);
+    }
+}
+
diff --git a/rawstream/rawstream_ipc_linux.c b/rawstream/rawstream_ipc_linux.c
new file mode 100644
index 0000000..25dc1e7
--- /dev/null
+++ b/rawstream/rawstream_ipc_linux.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2020-2022 NVIDIA Corporation.  All Rights Reserved.
+ *
+ * NVIDIA Corporation and its licensors retain all intellectual property and
+ * proprietary rights in and to this software and related documentation.  Any
+ * use, reproduction, disclosure or distribution of this software and related
+ * documentation without an express license agreement from NVIDIA Corporation
+ * is strictly prohibited.
+ */
+
+#include "rawstream.h"
+
+// Initialize one end of named communcation channel
+NvSciError ipcInit(const char* endpointName, IpcWrapper* ipcWrapper)
+{
+    NvSciError err = NvSciError_Success;
+
+    // Open named endpoint
+    err = NvSciIpcOpenEndpoint(endpointName, &ipcWrapper->endpoint);
+    if (err != NvSciError_Success) {
+        fprintf(stderr, "Unable to open endpoint %s (%x)\n",
+                endpointName, err);
+        goto fail;
+    }
+
+    // initialize IPC event notifier
+    err = NvSciIpcGetLinuxEventFd(ipcWrapper->endpoint, &ipcWrapper->ipcEventFd);
+    if (err != NvSciError_Success) {
+        fprintf(stderr, "Unable to get Linux event fd (%x)\n", err);
+        goto fail;
+    }
+
+    // Retrieve endpoint info
+    err = NvSciIpcGetEndpointInfo(ipcWrapper->endpoint, &ipcWrapper->info);
+    if (NvSciError_Success != err) {
+        fprintf(stderr, "Unable to retrieve IPC endpoint info (%x)", err);
+        goto fail;
+    }
+
+    err = NvSciIpcResetEndpointSafe(ipcWrapper->endpoint);
+    if (NvSciError_Success != err) {
+        fprintf(stderr, "Unable to reset IPC endpoint (%x)", err);
+    }
+
+fail:
+    return err;
+}
+
+// Clean up IPC when done
+void ipcDeinit(IpcWrapper* ipcWrapper)
+{
+    NvSciError err = NvSciIpcCloseEndpointSafe(ipcWrapper->endpoint, false);
+    if (NvSciError_Success != err) {
+        fprintf(stderr, "NvSciIpcCloseEndpointSafe failed (%x)\n", err);
+    }
+}
+
+// Wait for an event on IPC channel
+static NvSciError waitEvent(IpcWrapper* ipcWrapper, uint32_t value)
+{
+    fd_set rfds;
+    uint32_t event = 0;
+    NvSciError err;
+
+    while (true) {
+        // Get pending IPC events
+        err = NvSciIpcGetEventSafe(ipcWrapper->endpoint, &event);
+        if (NvSciError_Success != err) {
+            fprintf(stderr, "NvSciIpcGetEventSafe failed (%x)\n", err);
+            return err;
+        }
+        // Return if event is the kind we're looking for
+        if (0U != (event & value)) {
+            break;
+        }
+
+        FD_ZERO(&rfds);
+        FD_SET(ipcWrapper->ipcEventFd, &rfds);
+
+        // Wait for signalling indicating new event
+        if (select(ipcWrapper->ipcEventFd + 1, &rfds, NULL, NULL, NULL) < 0) {
+            // select failed
+            return NvSciError_ResourceError;
+        }
+        if(!FD_ISSET(ipcWrapper->ipcEventFd, &rfds)) {
+            return NvSciError_NvSciIpcUnknown;
+        }
+    }
+    return NvSciError_Success;
+}
+
+// Send a message over IPC
+NvSciError ipcSend(IpcWrapper* ipcWrapper, const void* buf, const size_t size)
+{
+    NvSciError err = NvSciError_Success;
+    bool done = false;
+    uint32_t bytes;
+
+    // Loop until entire message sent
+    while (done == false) {
+
+        // Wait for room in channel to send a message
+        err = waitEvent(ipcWrapper, NV_SCI_IPC_EVENT_WRITE);
+        if (NvSciError_Success != err) {
+            goto fail;
+        }
+
+        assert(size <= UINT32_MAX);
+
+        // Send as much of the message as we can
+        err = NvSciIpcWriteSafe(ipcWrapper->endpoint, buf, (uint32_t)size,
+                                &bytes);
+        if (NvSciError_Success != err) {
+            fprintf(stderr, "IPC write failed (%x)\n", err);
+            goto fail;
+        }
+
+        // For this simple sample, we just fail if the entire message wasn't
+        //   sent. Could instead retry to send the rest.
+        if (size != (size_t)bytes) {
+            fprintf(stderr, "Failed to send entire message (%u < %zu)\n",
+                    bytes, size);
+            err = NvSciError_NvSciIpcUnknown;
+            goto fail;
+        }
+        done = true;
+    }
+
+fail:
+    return err;
+}
+
+// Receive a message over IPC
+NvSciError ipcRecvFill(IpcWrapper* ipcWrapper, void* buf, const size_t size)
+{
+    NvSciError err = NvSciError_Success;
+    bool done = false;
+    uint32_t bytes;
+
+    // Loop until entire message received
+    while (done == false) {
+
+        // Wait for incoming data
+        err = waitEvent(ipcWrapper, NV_SCI_IPC_EVENT_READ);
+        if (NvSciError_Success != err) {
+            goto fail;
+        }
+
+        assert(size <= UINT32_MAX);
+
+        // Read as much of the message as we can
+        err = NvSciIpcReadSafe(ipcWrapper->endpoint, buf, (uint32_t)size,
+                               &bytes);
+        if (NvSciError_Success != err) {
+            fprintf(stderr, "IPC read failed (%x)\n", err);
+            goto fail;
+        }
+
+        // For this simple sample, we just fail if the entire message wasn't
+        //   read. Could instead retry to receive the rest.
+        if (size != (size_t)bytes) {
+            fprintf(stderr, "Failed to read entire message (%u < %zu)\n",
+                    bytes, size);
+            err = NvSciError_NvSciIpcUnknown;
+            goto fail;
+        }
+        done = true;
+    }
+
+fail:
+    return err;
+}
diff --git a/rawstream/rawstream_main.c b/rawstream/rawstream_main.c
new file mode 100644
index 0000000..bb324a4
--- /dev/null
+++ b/rawstream/rawstream_main.c
@@ -0,0 +1,230 @@
+//! \file
+//! \brief NvStreams rawstream main file.
+//!
+//! \copyright
+//! SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//! SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+//!
+//! NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+//! property and proprietary rights in and to this material, related
+//! documentation and any modifications thereto. Any use, reproduction,
+//! disclosure or distribution of this material and related documentation
+//! without an express license agreement from NVIDIA CORPORATION or
+//! its affiliates is strictly prohibited.
+
+#include "rawstream.h"
+#include <getopt.h>
+
+
+NvSciSyncModule   syncModule          = NULL;
+NvSciBufModule    bufModule           = NULL;
+
+NvSciSyncAttrList producerSignalAttrs = NULL;
+NvSciSyncAttrList consumerSignalAttrs = NULL;
+NvSciSyncAttrList producerWaitAttrs   = NULL;
+NvSciSyncAttrList consumerWaitAttrs   = NULL;
+NvSciSyncAttrList prodToConsAttrs     = NULL;
+NvSciSyncAttrList consToProdAttrs     = NULL;
+NvSciSyncObj      consumerSignalObj   = NULL;
+NvSciSyncObj      producerSignalObj   = NULL;
+NvSciSyncObj      consumerWaitObj     = NULL;
+NvSciSyncObj      producerWaitObj     = NULL;
+
+NvSciBufAttrList  producerWriteAttrs  = NULL;
+NvSciBufAttrList  consumerReadAttrs   = NULL;
+NvSciBufAttrList  combinedBufAttrs    = NULL;
+Buffer            buffers[totalBuffers];
+IpcWrapper        ipcWrapper;
+
+int late_attach = 0;
+int main(int argc, char *argv[])
+{
+    NvSciError err;
+    int producer;
+    const char* endpoint = "Unknown";
+    int ret = 0;
+
+    int opt;
+    producer = -1;
+    while ((opt = getopt(argc, argv, "pcl")) != -1) {
+        switch (opt) {
+        case 'p':
+            producer = 1;
+            endpoint = "Producer";
+            break;
+        case 'c':
+            producer = 0;
+            endpoint = "Consumer";
+            break;
+        case 'l':
+            late_attach = 1;
+            break;
+        default:
+            fprintf(stderr, "Unknown option: '%c'\n", opt);
+        }
+    }
+
+    if (producer == -1 || optind < argc) {
+        fprintf(stderr, "Usage: ./rawstream [-l] {-p or -c}\n");
+        fprintf(stderr,
+           "-p denotes producer, -c denotes consumer, -l denotes late-attach \n");
+        fprintf(stderr,
+           "either -p or -c should be provided, while -l is optional param\n");
+        return 1;
+    }
+
+    fprintf(stderr, "%p application starting\n", endpoint);
+
+    // Open sync module (shared by both all threads)
+    err = NvSciSyncModuleOpen(&syncModule);
+    if (NvSciError_Success != err) {
+        fprintf(stderr, "%s unable to open sync module (%x)\n",
+                endpoint, err);
+        return 1;
+    }
+
+    // Open buf module (shared by both all threads)
+    err = NvSciBufModuleOpen(&bufModule);
+    if (NvSciError_Success != err) {
+        fprintf(stderr, "%s unable to open buf module (%x)\n",
+                endpoint, err);
+        ret = 1;
+        goto close_sync_module;
+    }
+
+    // Initialize IPC library
+    err = NvSciIpcInit();
+    if (NvSciError_Success != err) {
+        fprintf(stderr, "%s unable to init ipc library (%x)\n",
+                endpoint, err);
+        ret = 1;
+        goto close_buf_module;
+    }
+
+    // Establish IPC communications based on endpoint
+    // TODO: Settle on final IPC channel names
+    if (producer == 1) {
+        err = ipcInit("nvscisync_a_0", &ipcWrapper);
+    } else {
+        err = ipcInit("nvscisync_a_1", &ipcWrapper);
+    }
+    if (NvSciError_Success != err) {
+        fprintf(stderr, "%s unable to initialize communication (%x)\n",
+                endpoint, err);
+        ret = 1;
+        goto deinit_IPC;
+    }
+
+    // Test communication by exchanging a simple handshake message
+    const int send_handshake = 12345;
+    err = ipcSend(&ipcWrapper, &send_handshake, sizeof(send_handshake));
+    if (NvSciError_Success != err) {
+        fprintf(stderr, "%s failed to send handshake (%x)\n",
+                endpoint, err);
+        ret = 1;
+        goto deinit_IPC;
+    }
+
+    int recv_handshake = 0;
+    ipcRecvFill(&ipcWrapper, &recv_handshake, sizeof(recv_handshake));
+    if (NvSciError_Success != err) {
+        fprintf(stderr, "%s failed to receive handshake (%x)\n",
+                endpoint, err);
+        ret = 1;
+        goto deinit_IPC;
+    }
+
+    if (send_handshake != recv_handshake) {
+        fprintf(stderr, "%s handshake did not match (%x)\n",
+                endpoint, err);
+        ret = 1;
+        goto deinit_IPC;
+    }
+
+    // Initialize buffer list
+    for (uint32_t i=0; i<totalBuffers; ++i) {
+        buffers[i].owner = 0;
+        buffers[i].fence = NvSciSyncFenceInitializer;
+    }
+
+    // Launch appropriate thread
+    if (producer == 1) {
+        // Launch producer threads
+        pthread_t producerThread;
+
+        if (0 != pthread_create(&producerThread, NULL, producerFunc, &ret)) {
+            fprintf(stderr, "Failed to launch producer\n");
+            ret = 1;
+            goto deinit_IPC;
+        }
+
+        // Wait for thread to finish
+        (void)pthread_join(producerThread, NULL);
+
+    } else {
+        // Launch consumer threads
+        pthread_t consumerThread;
+
+        if (0 != pthread_create(&consumerThread, NULL, consumerFunc, &ret)) {
+            fprintf(stderr, "Failed to launch consumer\n");
+            ret = 1;
+            goto deinit_IPC;
+        }
+
+        // Wait for thread to finish
+        (void)pthread_join(consumerThread, NULL);
+    }
+
+deinit_IPC:
+    ipcDeinit(&ipcWrapper);
+    (void)NvSciIpcDeinit();
+close_buf_module:
+    (void)NvSciBufModuleClose(bufModule);
+close_sync_module:
+    (void)NvSciSyncModuleClose(syncModule);
+
+    fprintf(stderr, "Sample completed\n");
+
+    return ret;
+}
+
+// Checksum calculation
+#define CRC32_POLYNOMIAL 0xEDB88320L
+
+uint32_t GenerateCRC(uint8_t* data_ptr,
+                     uint32_t height,
+                     uint32_t width,
+                     uint32_t pitch)
+{
+    uint32_t y = 0U, x = 0U;
+    uint32_t crc = 0U, tmp;
+    static uint32_t crcTable[256];
+    static int initialized = 0;
+
+    //Initilaize CRC table, which is an one time operation
+    if (!initialized) {
+        for (int i = 0; i <= 255; i++) {
+            tmp = i;
+            for (int j = 8; j > 0; j--) {
+                if (tmp & 1) {
+                  tmp = (tmp >> 1) ^ CRC32_POLYNOMIAL;
+                } else {
+                  tmp >>= 1;
+                }
+            }
+            crcTable[i] = tmp;
+        }
+        initialized = 1;
+    }
+
+    //Calculate CRC for the data
+    for (y = 0U; y < height; y++) {
+        for (x = 0U; x < width; x++) {
+            tmp = (crc >> 8) & 0x00FFFFFFL;
+            crc = tmp ^ crcTable[((uint32_t) crc ^ *(data_ptr + x)) & 0xFF];
+        }
+        data_ptr += pitch;
+    }
+
+    return crc;
+}
diff --git a/rawstream/rawstream_producer.c b/rawstream/rawstream_producer.c
new file mode 100644
index 0000000..b251286
--- /dev/null
+++ b/rawstream/rawstream_producer.c
@@ -0,0 +1,752 @@
+//! \file
+//! \brief NvStreams rawstream producer file.
+//!
+//! \copyright
+//! SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//! SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+//!
+//! NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+//! property and proprietary rights in and to this material, related
+//! documentation and any modifications thereto. Any use, reproduction,
+//! disclosure or distribution of this material and related documentation
+//! without an express license agreement from NVIDIA CORPORATION or
+//! its affiliates is strictly prohibited.
+
+#include "rawstream.h"
+
+extern int late_attach;
+
+void* producerFunc(void* arg)
+{
+    CudaClientInfo cudaInfo;
+    NvSciError     sciErr;
+    int            cudaErr;
+    void* recvWaitListDesc = NULL;
+    void* recvObjAndListDesc = NULL;
+    void* consumerReadAttrsDesc = NULL;
+
+    *(int*)arg = 1;
+    fprintf(stderr, "Producer starting\n");
+
+    // Do common cuda initialization
+    if (!setupCuda(&cudaInfo)) {
+        goto done;
+    }
+
+    // Create an empty sync attribute list for signaling permissions.
+    sciErr = NvSciSyncAttrListCreate(syncModule, &producerSignalAttrs);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to create producer signal attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Query CUDA for attributes needed to signal syncs
+    cudaErr = cudaDeviceGetNvSciSyncAttributes(producerSignalAttrs,
+                                               cudaInfo.deviceId,
+                                               cudaNvSciSyncAttrSignal);
+    if (cudaSuccess != cudaErr) {
+        fprintf(stderr,
+                "Could not query signal attributes from CUDA (%d)\n",
+                cudaErr);
+        goto done;
+    }
+
+    fprintf(stderr, "Producer signal attributes established\n");
+
+    // Create an empty sync attribute list for waiting permissions.
+    sciErr = NvSciSyncAttrListCreate(syncModule, &producerWaitAttrs);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to create producer wait attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Query CUDA for attributes needed to wait for syncs
+    cudaErr = cudaDeviceGetNvSciSyncAttributes(producerWaitAttrs,
+                                               cudaInfo.deviceId,
+                                               cudaNvSciSyncAttrWait);
+    if (cudaSuccess != cudaErr) {
+        fprintf(stderr,
+                "Could not query wait attributes from CUDA (%d)\n",
+                cudaErr);
+        goto done;
+    }
+
+    fprintf(stderr, "Producer wait attributes established\n");
+
+    // Export producer's wait attributes to a form suitable for IPC
+    size_t sendWaitAttrListSize = 0U;
+    void* sendWaitListDesc = NULL;
+    sciErr = NvSciSyncAttrListIpcExportUnreconciled(&producerWaitAttrs,
+                                                    1,
+                                                    ipcWrapper.endpoint,
+                                                    &sendWaitListDesc,
+                                                    &sendWaitAttrListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to export producer wait attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Send the size of the producer's wait attributes to the consumer,
+    //   so it knows how much data to expect
+    sciErr = ipcSend(&ipcWrapper,
+                     &sendWaitAttrListSize,
+                     sizeof(sendWaitAttrListSize));
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to send producer wait attrs size (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Send the exported form of the producer's wait attributes
+    sciErr = ipcSend(&ipcWrapper,
+                     sendWaitListDesc,
+                     sendWaitAttrListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to send producer wait attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Wait to receive the size of the consumer's wait attributes
+    size_t recvWaitAttrListSize = 0U;
+    sciErr = ipcRecvFill(&ipcWrapper,
+                         &recvWaitAttrListSize,
+                         sizeof(recvWaitAttrListSize));
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to recv consumer wait attr size (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Allocate a buffer big enough to receive the consumer's wait attributes
+    recvWaitListDesc = malloc(recvWaitAttrListSize);
+    if (NULL == recvWaitListDesc) {
+        sciErr = NvSciError_InsufficientMemory;
+        fprintf(stderr,
+                "Sync attr allocation failed (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Wait to receive consumer's wait attributes
+    sciErr = ipcRecvFill(&ipcWrapper,
+                         recvWaitListDesc,
+                         recvWaitAttrListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to recv consumer wait attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Convert the received consumer wait attributes to an attribute list
+    sciErr = NvSciSyncAttrListIpcImportUnreconciled(syncModule,
+                                                    ipcWrapper.endpoint,
+                                                    recvWaitListDesc,
+                                                    recvWaitAttrListSize,
+                                                    &consumerWaitAttrs);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to import consumer wait attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Get combined attributes for producer to consumer signaling
+    NvSciSyncAttrList syncAllAttrs[2], syncConflictAttrs;
+    syncAllAttrs[0] = producerSignalAttrs;
+    syncAllAttrs[1] = consumerWaitAttrs;
+    sciErr = NvSciSyncAttrListReconcile(syncAllAttrs,
+                                        2,
+                                        &prodToConsAttrs,
+                                        &syncConflictAttrs);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't merge producer->consumer attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Allocate producer to consumer sync object
+    sciErr = NvSciSyncObjAlloc(prodToConsAttrs, &producerSignalObj);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't allocate producer->consumer sync (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Export sync attributes and object to a form suitable for IPC
+    void* sendObjAndListDesc = NULL;
+    size_t sendObjAndListSize = 0U;
+    sciErr = NvSciSyncIpcExportAttrListAndObj(producerSignalObj,
+                                              NvSciSyncAccessPerm_WaitOnly,
+                                              ipcWrapper.endpoint,
+                                              &sendObjAndListDesc,
+                                              &sendObjAndListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't export producer->consumer sync description (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Send the size of the sync description to the consumer,
+    //   so it knows how much data to expect
+    sciErr = ipcSend(&ipcWrapper, &sendObjAndListSize, sizeof(size_t));
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't send producer->consumer sync description size(%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Send the sync description to the consumer
+    sciErr = ipcSend(&ipcWrapper, sendObjAndListDesc, sendObjAndListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't send producer->consumer sync description (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Wait to receive the size of the consumer->producer sync desription
+    size_t recvObjAndListSize = 0U;
+    sciErr = ipcRecvFill(&ipcWrapper,
+                         &recvObjAndListSize,
+                         sizeof(size_t));
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't recv consumer->produce sync description size (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Allocate a buffer big enough to receive the description
+    recvObjAndListDesc = malloc(recvObjAndListSize);
+    if (NULL == recvObjAndListDesc) {
+        sciErr = NvSciError_InsufficientMemory;
+        fprintf(stderr,
+                "Sync description allocation failed (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Wait to receive consumer->producer sync description
+    sciErr = ipcRecvFill(&ipcWrapper,
+                         recvObjAndListDesc,
+                         recvObjAndListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't receive consumer->producer sync description (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Convert the received consumer->producer sync description to a
+    //   sync attribute list and object
+    sciErr = NvSciSyncIpcImportAttrListAndObj(syncModule,
+                                              ipcWrapper.endpoint,
+                                              recvObjAndListDesc,
+                                              recvObjAndListSize,
+                                              &producerWaitAttrs,
+                                              1,
+                                              NvSciSyncAccessPerm_WaitOnly,
+                                              ipcWrapper.endpoint,
+                                              &producerWaitObj);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr, "Can't import consumer->producer sync (%x)\n", sciErr);
+        goto done;
+    }
+
+    // Set up CUDA sync objects, importing NvSciSync objects
+    if (!setupCudaSync(&cudaInfo, producerSignalObj, producerWaitObj)) {
+        goto done;
+    }
+
+    fprintf(stderr, "Producer exchanged sync objects with consumer\n");
+
+    // Create an empty buffer attribute list for producer buffers
+    sciErr = NvSciBufAttrListCreate(bufModule, &producerWriteAttrs);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to create producer buffer attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Fill producer buffer attribute list with values
+    NvSciBufAttrKeyValuePair bufKeyValue[7];
+    NvSciRmGpuId gpuId;
+    memcpy(&gpuId.bytes, &cudaInfo.uuid.bytes, sizeof(cudaInfo.uuid.bytes));
+    bufKeyValue[0].key   = NvSciBufGeneralAttrKey_GpuId;
+    bufKeyValue[0].value = &gpuId;
+    bufKeyValue[0].len   = sizeof(gpuId);
+    NvSciBufType bufType = NvSciBufType_RawBuffer;
+    bufKeyValue[1].key   = NvSciBufGeneralAttrKey_Types;
+    bufKeyValue[1].value = &bufType;
+    bufKeyValue[1].len   = sizeof(bufType);
+    NvSciBufAttrValAccessPerm bufPerm = NvSciBufAccessPerm_ReadWrite;
+    bufKeyValue[2].key   = NvSciBufGeneralAttrKey_RequiredPerm;
+    bufKeyValue[2].value = &bufPerm;
+    bufKeyValue[2].len   = sizeof(bufPerm);
+    bool   bufAccessFlag = true;
+    bufKeyValue[3].key   = NvSciBufGeneralAttrKey_NeedCpuAccess;
+    bufKeyValue[3].value = &bufAccessFlag;
+    bufKeyValue[3].len   = sizeof(bufAccessFlag);
+    uint64_t rawsize     = (128 * 1024);
+    bufKeyValue[4].key   = NvSciBufRawBufferAttrKey_Size;
+    bufKeyValue[4].value = &rawsize;
+    bufKeyValue[4].len   = sizeof(rawsize);
+    uint64_t align       = (4 * 1024);
+    bufKeyValue[5].key   = NvSciBufRawBufferAttrKey_Align;
+    bufKeyValue[5].value = &align;
+    bufKeyValue[5].len   = sizeof(align);
+    if (late_attach) {
+        // Add late peer location attribute
+        NvSciBufPeerLocationInfo peerLocation;
+        peerLocation.socID = NV_SCI_BUF_PEER_INFO_SELF_SOCID;
+        peerLocation.vmID = NV_SCI_BUF_PEER_INFO_SELF_VMID;
+        peerLocation.reserved = 0;
+        bufKeyValue[6].key   = NvSciBufGeneralAttrKey_PeerLocationInfo;
+        bufKeyValue[6].value = &peerLocation;
+        bufKeyValue[6].len   = sizeof(peerLocation);
+        sciErr = NvSciBufAttrListSetAttrs(producerWriteAttrs, bufKeyValue, 7);
+    } else {
+        sciErr = NvSciBufAttrListSetAttrs(producerWriteAttrs, bufKeyValue, 6);
+    }
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr, "Unable to fill producer buffer attrs (%x)\n", sciErr);
+        goto done;
+    }
+
+    fprintf(stderr, "Producer buffer attributes established\n");
+
+
+    void* sendBufListDesc = NULL;
+
+  if (late_attach) {
+    // We don't have peer attributes. Just reconciled our own attributes
+    NvSciBufAttrList bufAllAttrs[2], bufConflictAttrs;
+    bufAllAttrs[0] = producerWriteAttrs;
+    // bufAllAttrs[1] = consumerReadAttrs;
+    sciErr = NvSciBufAttrListReconcile(bufAllAttrs, 1,
+                                       &combinedBufAttrs, &bufConflictAttrs);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr, "Can't merge buffer attrs (%x)\n", sciErr);
+        goto done;
+    }
+
+    // Allocate all buffers
+    for (uint32_t i=0U; i<totalBuffers; ++i) {
+
+        Buffer* buf = &buffers[i];
+        // Allocate the buffer
+        sciErr = NvSciBufObjAlloc(combinedBufAttrs, &buf->obj);
+        if (NvSciError_Success != sciErr) {
+            fprintf(stderr, "Can't allocate buffer %d (%x)\n", i, sciErr);
+            goto done;
+        }
+    }
+  }
+
+    // Wait to receive the size of the consumer's buffer attributes
+    size_t consumerReadAttrsSize = 0U;
+    sciErr = ipcRecvFill(&ipcWrapper,
+                         &consumerReadAttrsSize,
+                         sizeof(consumerReadAttrsSize));
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to recv consumer buffer attr size (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Allocate a buffer big enough to receive the consumer's buffer attributes
+    consumerReadAttrsDesc = malloc(consumerReadAttrsSize);
+    if (NULL == recvWaitListDesc) {
+        sciErr = NvSciError_InsufficientMemory;
+        fprintf(stderr, "Buffer attr allocation failed(%x)\n", sciErr);
+        goto done;
+    }
+
+    // Wait to receive the consumer's buffer attributes
+    sciErr = ipcRecvFill(&ipcWrapper,
+                         consumerReadAttrsDesc,
+                         consumerReadAttrsSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr, "Unable to recv consumer buffer attrs (%x)\n", sciErr);
+        goto done;
+    }
+
+    // Convert the received consumer buffer attributes to an attribute list
+    sciErr = NvSciBufAttrListIpcImportUnreconciled(bufModule,
+                                                   ipcWrapper.endpoint,
+                                                   consumerReadAttrsDesc,
+                                                   consumerReadAttrsSize,
+                                                   &consumerReadAttrs);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to import consumer buffer attrs (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Get combined attributes for buffers
+    NvSciBufAttrList bufAllAttrs[2], bufConflictAttrs;
+    bufAllAttrs[0] = producerWriteAttrs;
+    bufAllAttrs[1] = consumerReadAttrs;
+    sciErr = NvSciBufAttrListReconcile(bufAllAttrs, 2,
+                                       &combinedBufAttrs, &bufConflictAttrs);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr, "Can't merge buffer attrs (%x)\n", sciErr);
+        goto done;
+    }
+
+    // Export combined buffer attributes to a form suitable for IPC
+    sendBufListDesc = NULL;
+    size_t sendBufListSize = 0U;
+    sciErr = NvSciBufAttrListIpcExportReconciled(combinedBufAttrs,
+                                                 ipcWrapper.endpoint,
+                                                 &sendBufListDesc,
+                                                 &sendBufListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Can't export reconciled buffer attrs to consumer (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Send the size of the combined buffer attributes to the consumer,
+    //   so it knows how much data to expect
+    sciErr = ipcSend(&ipcWrapper,
+                     &sendBufListSize,
+                     sizeof(sendBufListSize));
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr,
+                "Unable to send combined buffer attrs size (%x)\n",
+                sciErr);
+        goto done;
+    }
+
+    // Send the exported form of the combined buffer attributes
+    sciErr = ipcSend(&ipcWrapper,
+                     sendBufListDesc,
+                     sendBufListSize);
+    if (NvSciError_Success != sciErr) {
+        fprintf(stderr, "Unable to send combined buffer attrs (%x)\n", sciErr);
+        goto done;
+    }
+
+
+    // Extract attributes needed by CUDA
+    if (!setupCudaBufAttr(&cudaInfo, combinedBufAttrs)) {
+        goto done;
+    }
+
+    // Export all buffers
+    for (uint32_t i=0U; i<totalBuffers; ++i) {
+
+        Buffer* buf = &buffers[i];
+
+        if (0 == late_attach) {
+            // Allocate the buffer
+            sciErr = NvSciBufObjAlloc(combinedBufAttrs, &buf->obj);
+            if (NvSciError_Success != sciErr) {
+                fprintf(stderr, "Can't allocate buffer %d (%x)\n", i, sciErr);
+                goto done;
+            }
+        } else {
+            NvSciBufAttrList bufAllAttrs[1];
+            ///NV
+            //Use imported consumer attribute list.
+            // bufAllAttrs[0] = producerWriteAttrs;
+            bufAllAttrs[0] = consumerReadAttrs;
+            // Invoke NvSciBufObjAttachPeer() before exporting the SciBufObj
+            sciErr = NvSciBufObjAttachPeer(buf->obj, bufAllAttrs, 1);
+            if (NvSciError_Success != sciErr) {
+                fprintf(stderr, "NvSciBufObjAttachPeer call failed error: %x\n", sciErr);
+                goto done;
+            } else {
+                fprintf(stderr, "NvSciBufObjAttachPeer call succeeded\n");
+            }
+        }
+
+        // Export buffer object to a form suitable for IPC
+        // Note: Unlike attribute lists, the exported form of objects has
+        //       a fixed size.
+        NvSciBufObjIpcExportDescriptor objDesc;
+        sciErr = NvSciBufObjIpcExport(buf->obj,
+                                      NvSciBufAccessPerm_ReadWrite,
+                                      ipcWrapper.endpoint,
+                                      &objDesc);
+        if (NvSciError_Success != sciErr) {
+            fprintf(stderr,
+                    "Unable to export buffer %d object to consumer (%x)\n",
+                    i, sciErr);
+            goto done;
+        }
+
+        // Send the buffer description to the consumer
+        sciErr = ipcSend(&ipcWrapper,
+                         &objDesc,
+                         sizeof(NvSciBufObjIpcExportDescriptor));
+        if (NvSciError_Success != sciErr) {
+            fprintf(stderr, "Unable to send buffer %d (%x)\n", i, sciErr);
+            goto done;
+        }
+
+        // Import the buffer into CUDA
+        if (!setupCudaBuffer(&cudaInfo, buf)) {
+            goto done;
+        }
+
+        {
+            NvSciBufAttrList reconciledList;
+            sciErr = NvSciBufObjGetAttrList(buf->obj, &reconciledList);
+            if (NvSciError_Success != sciErr) {
+                fprintf(stderr, "Can't get the reconciled list from NvSciBufObj %d (%x)\n", i, sciErr);
+                goto done;
+            }
+
+            sciErr = NvSciBufAttrListValidateReconciledAgainstAttrs(
+                reconciledList, bufKeyValue, 6);
+            if (NvSciError_Success != sciErr) {
+                fprintf(stderr, "Validation of combinedBufAttrs failed (%x)\n", sciErr);
+                goto done;
+            }
+
+            sciErr = NvSciBufObjValidate(buf->obj);
+            if (NvSciError_Success != sciErr) {
+                fprintf(stderr, "Validation of buffer %d failed (%x)\n", i, sciErr);
+                goto done;
+            }
+        }
+    }
+
+    fprintf(stderr, "Producer buffers established and transmitted\n");
+
+    // Validate handles before starting the runtime phase
+    {
+        NvSciSyncAttrList producerWaitList;
+
+        sciErr = NvSciSyncAttrListValidateReconciledAgainstAttrs(
+            prodToConsAttrs,
+            NULL,
+            0,
+            NvSciSyncAccessPerm_SignalOnly);
+        if (NvSciError_Success != sciErr) {
+            fprintf(
+                stderr,
+                "Validation of prodToCons list failed: %x\n", sciErr);
+            goto done;
+        }
+
+        sciErr = NvSciSyncObjGetAttrList(producerWaitObj,
+                                         &producerWaitList);
+        if (NvSciError_Success != sciErr) {
+            fprintf(stderr,
+                    "Can't get the reconciled list from producer wait object (%x)\n",
+                    sciErr);
+            goto done;
+        }
+
+        sciErr = NvSciSyncAttrListValidateReconciledAgainstAttrs(
+            producerWaitList,
+            NULL,
+            0,
+            NvSciSyncAccessPerm_WaitOnly);
+        if (NvSciError_Success != sciErr) {
+            fprintf(
+                stderr,
+                "Validation of imported reconciled producer wait list failed: %x\n",
+                sciErr);
+            goto done;
+        }
+
+        sciErr = NvSciSyncObjValidate(producerWaitObj);
+        if (NvSciError_Success != sciErr) {
+            fprintf(
+                stderr,
+                "Validation of imported producer wait object failed: %x\n",
+                sciErr);
+            goto done;
+        }
+    }
+
+    // Send all frames
+    uint32_t currFrame  = 0;
+    uint32_t currBuffer = 0;
+    Packet packet;
+    while (currFrame < totalFrames) {
+        fprintf(stderr, "Producer starting frame %d in buffer %d\n",
+                currFrame, currBuffer);
+        Buffer* buf = &buffers[currBuffer];
+
+        // Wait for buffer to be available
+        // Note: On first frame for each buffer, the producer already owns
+        //       it, so this is skipped. On subsequent frames it must wait
+        //       for the buffer's return.
+        while (buf->owner != 0U) {
+
+            // Wait for next returned buffer
+            sciErr = ipcRecvFill(&ipcWrapper, &packet, sizeof(packet));
+            if (NvSciError_Success != sciErr) {
+                fprintf(stderr,
+                        "Failure to recv buffer from consumer (%x)\n",
+                        sciErr);
+                goto done;
+            }
+
+            // Import transmitted fence description to a fence
+            sciErr = NvSciSyncIpcImportFence(producerWaitObj,
+                                             &packet.fenceDesc,
+                                             &buffers[packet.bufferId].fence);
+            if (NvSciError_Success != sciErr) {
+                fprintf(stderr,
+                        "Failure to import fence from consumer (%x)\n",
+                        sciErr);
+                goto done;
+            }
+
+            // Extract checksum from packet
+            buffers[packet.bufferId].crc = packet.crc;
+
+            // Mark producer as owner of this buffer
+            buffers[packet.bufferId].owner = 0U;
+        }
+
+        // Wait for fence returned by consumer before rendering
+        if (!waitCudaFence(&cudaInfo, buf)) {
+            goto done;
+        }
+
+        // CUDA rendering to buffer
+        (void)memset(cudaInfo.bufCopy, (currFrame & 0xFF), cudaInfo.bufSize);
+
+        cudaErr = cudaMemcpy2DAsync(buf->ptr,
+                                    cudaInfo.bufSize,
+                                    cudaInfo.bufCopy,
+                                    cudaInfo.bufSize,
+                                    cudaInfo.bufSize,
+                                    1,
+                                    cudaMemcpyHostToDevice,
+                                    cudaInfo.stream);
+        if (cudaSuccess != cudaErr) {
+            fprintf(stderr, "Unable to initiate CUDA copy (%d)\n", cudaErr);
+            goto done;
+        }
+
+        // Generate new fence for the sync object
+        if (!signalCudaFence(&cudaInfo, buf)) {
+            goto done;
+        }
+
+        // Wait for operation to finish and compute checksum
+        // IMPORTANT NOTE:
+        //   A normal stream application would not perform these steps.
+        //   A checksum is not required for streaming, and waiting for
+        //     operations to finish (which we only need because the
+        //     checksum is calculated by the CPU) introduces bubbles
+        //     in the hardware pipeline. A real application can rely on
+        //     the generated NvSciSync fences for synchronization.
+        //   These steps are only taken in this sample application
+        //     because the consumer has no output visible to the user,
+        //     so the checksum allows us to verify that the application
+        //     is behaving properly.
+        cudaDeviceSynchronize();
+        buf->crc = GenerateCRC(cudaInfo.bufCopy,
+                               1,
+                               cudaInfo.bufSize,
+                               cudaInfo.bufSize);
+
+        fprintf(stderr, "Producer wrote frame %d in buffer %d\n",
+                currFrame, currBuffer);
+
+        // Mark buffer as owned by consumer now
+        buf->owner = 1U;
+
+        // Export buffer index, checksum, and fence for transmission over IPC
+        packet.bufferId = currBuffer;
+        packet.crc      = buf->crc;
+        sciErr = NvSciSyncIpcExportFence(&buf->fence,
+                                         ipcWrapper.endpoint,
+                                         &packet.fenceDesc);
+        if (NvSciError_Success != sciErr) {
+            fprintf(stderr, "Unable to export producer fence (%x)\n", sciErr);
+            goto done;
+        }
+
+        // Send buffer index and fence to consumer
+        sciErr = ipcSend(&ipcWrapper, &packet, sizeof(packet));
+        if (NvSciError_Success != sciErr) {
+            fprintf(stderr,
+                    "Failure to send buffer to consumer (%x)\n",
+                    sciErr);
+            goto done;
+        }
+
+        fprintf(stderr, "Producer finished frame %d in buffer %d\n",
+                currFrame, currBuffer);
+
+        // Advance buffer and frame
+        currBuffer = (currBuffer + 1U) % totalBuffers;
+        currFrame++;
+    }
+
+
+    // Success
+    *(int*)arg = 0;
+done:
+    // Free CUDA resources
+    deinitCudaBuffer(buffers, totalBuffers);
+    deinitCuda(&cudaInfo);
+
+    // Free NvSci objects
+    if (NULL != producerSignalAttrs)
+        NvSciSyncAttrListFree(producerSignalAttrs);
+    if (NULL != consumerWaitAttrs)
+        NvSciSyncAttrListFree(consumerWaitAttrs);
+    if (NULL != sendWaitListDesc)
+        NvSciSyncAttrListFreeDesc(sendWaitListDesc);
+    if (NULL != producerWaitAttrs)
+        NvSciSyncAttrListFree(producerWaitAttrs);
+    if (NULL != prodToConsAttrs)
+        NvSciSyncAttrListFree(prodToConsAttrs);
+    if (NULL != syncConflictAttrs)
+        NvSciSyncAttrListFree(syncConflictAttrs);
+    if (NULL != producerSignalObj)
+        NvSciSyncObjFree(producerSignalObj);
+    if (NULL != sendObjAndListDesc)
+        NvSciSyncAttrListAndObjFreeDesc(sendObjAndListDesc);
+    if (NULL != producerWaitObj)
+        NvSciSyncObjFree(producerWaitObj);
+    if (NULL != producerWriteAttrs)
+        NvSciBufAttrListFree(producerWriteAttrs);
+    if (NULL != consumerReadAttrs)
+        NvSciBufAttrListFree(consumerReadAttrs);
+    if (NULL != combinedBufAttrs)
+        NvSciBufAttrListFree(combinedBufAttrs);
+    if (NULL != sendBufListDesc)
+        NvSciBufAttrListFreeDesc(sendBufListDesc);
+
+    // Free malloc'd resources
+    if (NULL != recvWaitListDesc)
+        free(recvWaitListDesc);
+    if (NULL != recvObjAndListDesc)
+        free(recvObjAndListDesc);
+    if (NULL != consumerReadAttrsDesc)
+        free(consumerReadAttrsDesc);
+
+    fprintf(stderr, "Producer exiting\n");
+    return NULL;
+}