Files
nvsci_samples/event_sample_app/block_consumer_uc1.c
svcmobrel-release e42833bdf1 Updating prebuilts and/or headers
1158201e78094e9e866fa99095c9ffc2ec9f5a27 - event_sample_app/block_limiter.c
a71ed037f9d77d0944f40f54cf25db8180d007e2 - event_sample_app/block_queue.c
ced622a41d1a48dcb23e6a1a02ae9640ef9b837c - event_sample_app/util.h
a0bd135d707994a41ed3a4234b5f875a268fed4d - event_sample_app/Makefile
d7e42e2b6088ff4596abc7256eb018d757a4021e - event_sample_app/usecase1.h
f5e2aea98ba9264ee1068a700222dff8d5d5c7a4 - event_sample_app/block_c2c.c
dac99c442185b020fbdae07bfc1e7df78343eb83 - event_sample_app/block_info.h
5001f036389a4f7952cb4974dd3323908208ca30 - event_sample_app/event_loop_threads.c
8193be73ce0a488f62034cb87083cdf09f52cd5d - event_sample_app/block_pool.c
3a1013021a572887303fb6db245b5b01fe07e9a0 - event_sample_app/block_producer_uc1.c
6ff0f1c2d7ef2e2fa9ece6fdc850b58b87207526 - event_sample_app/block_returnsync.c
e0861e9fe5d160d47d758464146d7192f9c70a5f - event_sample_app/util.c
b5dd68bec3ae6f9049aad1cb5a86c3db4af02e17 - event_sample_app/block_presentsync.c
b52e34443ac441a9df48029de944aa0a50d1b101 - event_sample_app/event_loop_service.c
ef057870dade9af70656b37340e9bcad35d49380 - event_sample_app/block_multicast.c
65ffe5af6ae6bc0418f348167c473849d4697e47 - event_sample_app/block_ipc.c
44f6de348f8bdd5cb584b3e8cc4b05e9482dddd2 - event_sample_app/event_loop.h
40f949c4c37ab4aa4a84182b345f3de6fceab39b - event_sample_app/main.c
641e3634da873970b574b23a1024b2e7155b88ff - event_sample_app/block_consumer_uc1.c
d6bbd17599543f1760d87851150a12a2a842a24d - event_sample_app/block_common.c
1fbb82e2281bb2e168c87fd20903bbed898ca160 - rawstream/rawstream_cuda.c
458833ab233a725c067bf9b1fc60ef39872eee80 - rawstream/Makefile
3df4e5c00a3dc002ee9877e282bd28ffa87fa6f0 - rawstream/rawstream.h
d5ffeef3c7ad2af6f6f31385db7917b5ef9a7438 - rawstream/rawstream_ipc_linux.c
f28c1cd5fe26b6dc5930d5556b54364c9b91767c - rawstream/rawstream_main.c
2bed038ca070aa5dccd6b672a98f093340e829bb - rawstream/rawstream_producer.c
e26c09f1ad1a3a7d2c29dae1b38d3fd90c23af6e - rawstream/rawstream_consumer.c

Change-Id: Iec84f03094414392580a103d2bc94bac8ed86fbb
2024-05-02 11:24:59 -07:00

1177 lines
41 KiB
C

/* NvSciStream Event Loop Driven Sample App - consumer block for use case 1
*
* Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
/*
* This implements the consumer for use case 1: cuda to cuda streaming
*/
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include "nvscibuf.h"
#include "nvscisync.h"
#include "nvscistream.h"
#include "cuda.h"
#include "cuda_runtime_api.h"
#include "block_info.h"
#include "event_loop.h"
#include "usecase1.h"
#include "util.h"
/* Internal data structure used to track packets */
typedef struct {
/* The packet handle use for NvSciStream functions */
NvSciStreamPacket handle;
/* NvSci buffer object for the packet's data buffer */
NvSciBufObj dataObj;
/* CUDA external memory handle for the data buffer */
cudaExternalMemory_t dataExtMem;
/* CUDA device memory pointer for the data buffer */
void* dataDevMem;
/* Local system memory buffer used as the target for CUDA operations */
uint8_t* dataDstMem;
/* NvSci buffer object for the packet's CRC buffer */
NvSciBufObj crcObj;
/* Virtual address for the CRC buffer */
uint8_t const* crcPtr;
} ConsPacket;
/* Internal data used by the consumer block */
typedef struct {
/* Common block info */
BlockData common;
/* CUDA device ID and UUID */
int32_t cudaDeviceId;
CUuuid cudaUuid;
/* CUDA consumer stream */
cudaStream_t cudaStream;
/* CUDA sync attributes required for signaling */
NvSciSyncAttrList signalAttr;
/* CUDA sync attributes required for waiting */
NvSciSyncAttrList waiterAttr;
/* Sync object for CUDA to signal after processing data */
NvSciSyncObj signalObj;
/* CUDA semaphore mapped to sync object */
cudaExternalSemaphore_t signalSem;
/* Sync object to wait for before processing data */
NvSciSyncObj waiterObj;
/* CUDA semaphore mapped to sync object */
cudaExternalSemaphore_t waiterSem;
/* Element index chosen by pool for the CRC buffer */
uint32_t crcIndex;
/* Element index chosen by pool for the data buffer */
uint32_t dataIndex;
/* Size for data buffer after reconciling all requirements */
uint64_t dataSize;
/* Number of packets provided by pool */
uint32_t numPacket;
/* Information about each packet */
ConsPacket packets[MAX_PACKETS];
/* Number of payloads processed so far */
uint32_t counter;
} ConsData;
/* Free up the packet resources */
static void deletePacket(
ConsPacket* packet)
{
if (packet != NULL) {
if (packet->handle != NvSciStreamPacket_Invalid) {
/* Free CUDA memory mapping */
(void)cudaFree(packet->dataDevMem);
if (packet->dataExtMem) {
(void)cudaDestroyExternalMemory(packet->dataExtMem);
packet->dataExtMem = 0;
}
if (packet->dataDstMem) {
free(packet->dataDstMem);
packet->dataDstMem = NULL;
}
/* Free buffer objects */
if (packet->dataObj) {
NvSciBufObjFree(packet->dataObj);
packet->dataObj = NULL;
}
if (packet->crcObj) {
NvSciBufObjFree(packet->crcObj);
packet->crcObj = NULL;
}
}
/* Clear out packet information */
memset(packet, 0, sizeof(ConsPacket));
}
}
/* Free up consumer block resources */
static void deleteConsumer(
ConsData* consData)
{
/* Destroy block */
if (consData->common.block != 0) {
(void)NvSciStreamBlockDelete(consData->common.block);
consData->common.block = 0;
}
/* Free the packet resources */
for (uint32_t i=0;i<consData->numPacket; i++) {
deletePacket(&consData->packets[i]);
}
/* Free the sync objects */
if (consData->waiterObj != NULL) {
(void)cudaDestroyExternalSemaphore(consData->waiterSem);
consData->waiterSem = 0;
NvSciSyncObjFree(consData->waiterObj);
consData->waiterObj = NULL;
}
if (consData->signalObj != NULL) {
(void)cudaDestroyExternalSemaphore(consData->signalSem);
consData->signalSem = 0;
NvSciSyncObjFree(consData->signalObj);
consData->signalObj = NULL;
}
/* Destroy CUDA stream */
(void)cudaStreamDestroy(consData->cudaStream);
/* Free data */
free(consData);
}
/* Handle initialization of CUDA resources for consumer */
static int32_t handleConsumerInit(
ConsData* consData)
{
if (opts.endInfo) {
/* Query endpoint info from producer */
uint32_t size = INFO_SIZE;
char info[INFO_SIZE] = {0};
NvSciError err = NvSciStreamBlockUserInfoGet(
consData->common.block,
NvSciStreamBlockType_Producer, 0U,
ENDINFO_NAME_PROC,
&size, &info);
if (NvSciError_Success == err) {
printf("Producer info: %s\n", info);
} else if (NvSciError_StreamInfoNotProvided == err) {
printf("Info not provided by the producer\n");
} else {
printf("Failed (%x) to query the producer info\n", err);
return 0;
}
}
/*
* Init CUDA
*/
int32_t cudaRtErr;
CUresult cudaErr;
/* Get stack limit */
size_t unused;
cudaRtErr = cudaDeviceGetLimit(&unused, cudaLimitStackSize);
if (cudaSuccess != cudaRtErr) {
printf("Failed (%x) to get CUDA device limit\n", cudaRtErr);
return 0;
}
/* Set CUDA device */
consData->cudaDeviceId = 0;
cudaRtErr = cudaSetDevice(consData->cudaDeviceId);
if (cudaSuccess != cudaRtErr) {
printf("Failed (%x) to set CUDA device\n", cudaRtErr);
return 0;
}
/* Get UUID for CUDA device */
cudaErr = cuDeviceGetUuid(&consData->cudaUuid, consData->cudaDeviceId);
if (CUDA_SUCCESS != cudaErr) {
printf("Failed (%x) to get CUDA UUID\n", cudaErr);
return 0;
}
/* Get CUDA streams to be used for asynchronous operation */
cudaRtErr = cudaStreamCreateWithFlags(&consData->cudaStream,
cudaStreamNonBlocking);
if (cudaSuccess != cudaRtErr) {
printf("Failed (%x) to create CUDA stream\n", cudaRtErr);
return 0;
}
return 1;
}
/* Handle setup of supported buffer attributes */
static int32_t handleConsumerElemSupport(
ConsData* consData)
{
/*
* Note: To illustrate that NvSciStream producer and consumer do
* not need to specify the same set of element types, or use
* the same order for element types, the producer for this
* use case sends the CRC attributes first, followed by the
* primary data, while the consumer uses the opposite order.
* Our pool implementation will end up using the producer
* ordering, but that is not required either.
*/
NvSciError sciErr;
uint32_t bufName[2];
NvSciBufAttrList bufAttrs[2];
/*
* Data buffer requires read access by CPU and the GPU of the cuda
* device, and uses a raw data buffer. (Size is specified by producer.)
*/
NvSciBufAttrValAccessPerm dataPerm = NvSciBufAccessPerm_Readonly;
uint8_t dataCpu = 1U;
NvSciRmGpuId dataGpu = { 0 };
NvSciBufType dataBufType = NvSciBufType_RawBuffer;
memcpy(&dataGpu.bytes, &consData->cudaUuid.bytes, sizeof(dataGpu.bytes));
NvSciBufAttrKeyValuePair dataKeyVals[] = {
{ NvSciBufGeneralAttrKey_GpuId, &dataGpu, sizeof(dataGpu) },
{ NvSciBufGeneralAttrKey_Types, &dataBufType, sizeof(dataBufType) },
{ NvSciBufGeneralAttrKey_RequiredPerm, &dataPerm, sizeof(dataPerm) },
{ NvSciBufGeneralAttrKey_NeedCpuAccess, &dataCpu, sizeof(dataCpu) }
};
/* Create and fill attribute list for data buffer */
bufName[0] = ELEMENT_NAME_DATA;
sciErr = NvSciBufAttrListCreate(sciBufModule, &bufAttrs[0]);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to create data attribute list\n", sciErr);
return 0;
}
sciErr = NvSciBufAttrListSetAttrs(bufAttrs[0],
dataKeyVals,
sizeof(dataKeyVals) /
sizeof(NvSciBufAttrKeyValuePair));
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to fill data attribute list\n", sciErr);
return 0;
}
/*
* CRC buffer requires read access by CPU, and uses a raw 64 byte
* data buffer with 1 byte alignment.
*/
NvSciBufAttrValAccessPerm crcPerm = NvSciBufAccessPerm_Readonly;
uint8_t crcCpu = 1U;
NvSciBufType crcBufType = NvSciBufType_RawBuffer;
uint64_t crcSize = 64U;
uint64_t crcAlign = 1U;
NvSciBufAttrKeyValuePair crcKeyVals[] = {
{ NvSciBufGeneralAttrKey_Types, &crcBufType, sizeof(crcBufType) },
{ NvSciBufRawBufferAttrKey_Size, &crcSize, sizeof(crcSize) },
{ NvSciBufRawBufferAttrKey_Align, &crcAlign, sizeof(crcAlign) },
{ NvSciBufGeneralAttrKey_RequiredPerm, &crcPerm, sizeof(crcPerm) },
{ NvSciBufGeneralAttrKey_NeedCpuAccess, &crcCpu, sizeof(crcCpu) }
};
/* Create and fill attribute list for CRC checksum buffer */
bufName[1] = ELEMENT_NAME_CRC;
sciErr = NvSciBufAttrListCreate(sciBufModule, &bufAttrs[1]);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to create CRC attribute list\n", sciErr);
return 0;
}
sciErr = NvSciBufAttrListSetAttrs(bufAttrs[1],
crcKeyVals,
sizeof(crcKeyVals) /
sizeof(NvSciBufAttrKeyValuePair));
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to fill CRC attribute list\n", sciErr);
return 0;
}
/*
* Inform stream of the attributes
* Once sent, the attribute lists are no longer needed
*/
for (uint32_t i=0; i<2U; ++i) {
sciErr = NvSciStreamBlockElementAttrSet(consData->common.block,
bufName[i], bufAttrs[i]);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to send element %d attribute\n",
sciErr, i);
return 0;
}
NvSciBufAttrListFree(bufAttrs[i]);
}
/* Indicate that all element information has been exported */
sciErr = NvSciStreamBlockSetupStatusSet(consData->common.block,
NvSciStreamSetup_ElementExport,
true);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to complete element export\n", sciErr);
return 0;
}
return 1;
}
/* Handle receipt of chosen element attributes */
static int32_t handleConsumerElemSetting(
ConsData* consData)
{
NvSciError err;
/*
* This application does not need to query the element count, because we
* know it is always 2. But we do so anyways to show how it is done.
*/
uint32_t count;
err = NvSciStreamBlockElementCountGet(consData->common.block,
NvSciStreamBlockType_Pool,
&count);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to query element count\n", err);
return 0;
}
if (2U != count) {
printf("Consumer received unexpected element count (%d)\n", count);
return 0;
}
/* Process all elements */
for (uint32_t i=0U; i<2U; ++i) {
/* Query element type and attributes */
uint32_t type;
NvSciBufAttrList bufAttr;
err = NvSciStreamBlockElementAttrGet(consData->common.block,
NvSciStreamBlockType_Pool, i,
&type, &bufAttr);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to query element attr %d\n", err, i);
return 0;
}
/* For data element, need to extract size and save index */
if (ELEMENT_NAME_DATA == type) {
consData->dataIndex = i;
NvSciBufAttrKeyValuePair keyVals[] = {
{ NvSciBufRawBufferAttrKey_Size, NULL, 0 }
};
err = NvSciBufAttrListGetAttrs(bufAttr, keyVals, 1);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to obtain buffer size\n", err);
return 0;
}
consData->dataSize = *((const uint64_t*)(keyVals[0].value));
/* Set waiter attributes for the asynchronous element. */
err = NvSciStreamBlockElementWaiterAttrSet(consData->common.block,
i,
consData->waiterAttr);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to send waiter attr for elem %d\n",
err, i);
return 0;
}
/* Once sent, the waiting attributes are no longer needed */
NvSciSyncAttrListFree(consData->waiterAttr);
consData->waiterAttr = NULL;
}
/* For CRC element, just need to save the index */
else if (ELEMENT_NAME_CRC == type) {
consData->crcIndex = i;
/* CRC element is a synchronous element.
* No need to set the waiter attr, which is NULL by default. */
}
/* Report any unknown element */
else {
printf("Consumer received unknown element type (%x)\n", type);
return 0;
}
/* Don't need to keep attribute list */
NvSciBufAttrListFree(bufAttr);
/*
* Indicate element will be used.
* This is the default, and we can omit this call in most applications,
* but we illustrate its use for applications that only use some
* of the buffers.
*/
err = NvSciStreamBlockElementUsageSet(consData->common.block, i, true);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to indicate element %d is used\n",
err, i);
return 0;
}
}
/* Indicate that element import is complete */
err = NvSciStreamBlockSetupStatusSet(consData->common.block,
NvSciStreamSetup_ElementImport,
true);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to complete element import\n", err);
return 0;
}
/* Indicate that waiter attribute export is done. */
err = NvSciStreamBlockSetupStatusSet(consData->common.block,
NvSciStreamSetup_WaiterAttrExport,
true);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to complete waiter attr export\n", err);
return 0;
}
return 1;
}
/* Handle creation of a new packet */
static int32_t handleConsumerPacketCreate(
ConsData* consData)
{
NvSciError err;
/* Retrieve handle for packet pending creation */
NvSciStreamPacket handle;
err = NvSciStreamBlockPacketNewHandleGet(consData->common.block,
&handle);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to retrieve handle for the new packet\n",
err);
return 0;
}
/* Make sure there is room for more packets */
if (MAX_PACKETS <= consData->numPacket) {
printf("Consumer exceeded max packets\n");
err = NvSciStreamBlockPacketStatusSet(consData->common.block,
handle,
NvSciStreamCookie_Invalid,
NvSciError_Overflow);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to send packet status\n", err);
}
return 0;
}
/*
* Allocate the next entry in the array for the new packet.
* Use the array entry for the cookie
*/
ConsPacket* packet = &consData->packets[consData->numPacket++];
packet->handle = handle;
/* Retrieve all buffers and map into application
* Consumers can skip querying elements that they don't use.
* This use case has 2 elements.
*/
for (uint32_t index = 0U; index < 2U; index++) {
NvSciBufObj bufObj;
err = NvSciStreamBlockPacketBufferGet(consData->common.block,
handle,
index,
&bufObj);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to retrieve buffer (%lx/%d)\n",
err, handle, index);
return 0;
}
/* Handle mapping of data buffer */
NvSciError sciErr;
int32_t cudaRtErr;
if (index == consData->dataIndex) {
/* Save buffer object */
packet->dataObj = bufObj;
/* Map in the buffer as CUDA external memory */
struct cudaExternalMemoryHandleDesc memHandleDesc;
memset(&memHandleDesc, 0, sizeof(memHandleDesc));
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
memHandleDesc.handle.nvSciBufObject = bufObj;
memHandleDesc.size = consData->dataSize;
cudaRtErr = cudaImportExternalMemory(&packet->dataExtMem,
&memHandleDesc);
if (cudaSuccess != cudaRtErr) {
printf("Consumer failed (%x) to map buffer as external mem\n",
cudaRtErr);
return 0;
}
/* Map in the buffer as CUDA device memory */
struct cudaExternalMemoryBufferDesc memBufferDesc;
memset(&memBufferDesc, 0, sizeof(memBufferDesc));
memBufferDesc.size = consData->dataSize;
memBufferDesc.offset = 0;
cudaRtErr = cudaExternalMemoryGetMappedBuffer(&packet->dataDevMem,
packet->dataExtMem,
&memBufferDesc);
if (cudaSuccess != cudaRtErr) {
printf("Consumer failed (%x) to map buffer as device mem\n",
cudaRtErr);
return 0;
}
/* Allocate normal memory to use as the target for the CUDA op */
packet->dataDstMem = (uint8_t*)malloc(consData->dataSize);
if (NULL == packet->dataDstMem) {
printf("Consumer failed to allocate target buffer\n");
return 0;
}
/* Fill in with initial values */
memset(packet->dataDstMem, 0xD0, consData->dataSize);
}
/* Handle mapping of CRC buffer */
else if (index == consData->crcIndex) {
/* Save buffer object */
packet->crcObj = bufObj;
/* Get a CPU pointer for the buffer from NvSci */
sciErr = NvSciBufObjGetConstCpuPtr(bufObj,
(void const**)&packet->crcPtr);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to map CRC buffer\n", sciErr);
return 0;
}
}
/* Shouldn't be any other index */
else {
printf("Consumer received buffer for unknown element (%d)\n",
index);
return 0;
}
}
/* Inform pool of success.
* Note: Could inform the pool of any of the failures above.
*/
err = NvSciStreamBlockPacketStatusSet(consData->common.block,
handle,
(NvSciStreamCookie)packet,
NvSciError_Success);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to inform pool of packet status\n", err);
return 0;
}
return 1;
}
/* Handle deletion of packet */
static void handleConsumerPacketDelete(
ConsData* consData)
{
/* Get the deleted packet cookie*/
NvSciStreamCookie cookie;
NvSciError err =
NvSciStreamBlockPacketOldCookieGet(consData->common.block,
&cookie);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to retrieve the deleted packet cookie\n",
err);
}
/* Get packet pointer */
ConsPacket* packet = (ConsPacket*)cookie;
/* Free up the packet resources */
deletePacket(packet);
}
/* Handle setup of supported sync attributes */
static int32_t handleConsumerSyncSupport(
ConsData* consData)
{
NvSciError sciErr;
int32_t cudaRtErr;
/*
* Create sync attribute list for signaling.
* This will be saved until we receive the producer's attributes
*/
sciErr = NvSciSyncAttrListCreate(sciSyncModule, &consData->signalAttr);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to allocate signal sync attrs\n", sciErr);
return 0;
}
/* Have CUDA fill the signaling attribute list */
cudaRtErr = cudaDeviceGetNvSciSyncAttributes(consData->signalAttr,
consData->cudaDeviceId,
cudaNvSciSyncAttrSignal);
if (cudaSuccess != cudaRtErr) {
printf("Consumer failed (%x) to fill signal sync attrs\n", cudaRtErr);
return 0;
}
/* Create sync attribute list for waiting. */
sciErr = NvSciSyncAttrListCreate(sciSyncModule, &consData->waiterAttr);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to allocate waiter sync attrs\n", sciErr);
return 0;
}
/* Have CUDA fill the waiting attribute list */
cudaRtErr = cudaDeviceGetNvSciSyncAttributes(consData->waiterAttr,
consData->cudaDeviceId,
cudaNvSciSyncAttrWait);
if (cudaSuccess != cudaRtErr) {
printf("Consumer failed (%x) to fill waiter sync attrs\n", cudaRtErr);
return 0;
}
return 1;
}
/* Handle creation and export of consumer sync object */
static int32_t handleConsumerSyncExport(
ConsData* consData)
{
NvSciError sciErr;
uint32_t cudaRtErr;
/* Process waiter attrs from all elements.
* As CRC element is a synchronous element,
* no need to query the sync object for it.
*/
NvSciSyncAttrList waiterAttr = NULL;
sciErr = NvSciStreamBlockElementWaiterAttrGet(consData->common.block,
consData->dataIndex,
&waiterAttr);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to query waiter attr\n",
sciErr);
return 0;
}
if (NULL == waiterAttr) {
printf("Consumer received NULL waiter attr for data elem\n");
return 0;
}
/* Indicate that waiter attribute import is done. */
sciErr = NvSciStreamBlockSetupStatusSet(consData->common.block,
NvSciStreamSetup_WaiterAttrImport,
true);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to complete waiter attr import\n",
sciErr);
return 0;
}
/*
* Merge and reconcile producer sync attrs with ours.
*/
NvSciSyncAttrList unreconciled[2] = {
consData->signalAttr,
waiterAttr };
NvSciSyncAttrList reconciled = NULL;
NvSciSyncAttrList conflicts = NULL;
sciErr = NvSciSyncAttrListReconcile(unreconciled, 2,
&reconciled, &conflicts);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to reconcile sync attributes\n", sciErr);
return 0;
}
/* Allocate sync object */
sciErr = NvSciSyncObjAlloc(reconciled, &consData->signalObj);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to allocate sync object\n", sciErr);
return 0;
}
/* Free the attribute lists */
NvSciSyncAttrListFree(consData->signalAttr);
consData->signalAttr = NULL;
NvSciSyncAttrListFree(waiterAttr);
NvSciSyncAttrListFree(reconciled);
/* Create CUDA semaphore for sync object */
struct cudaExternalSemaphoreHandleDesc extSemDesc;
memset(&extSemDesc, 0, sizeof(extSemDesc));
extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
extSemDesc.handle.nvSciSyncObj = consData->signalObj;
cudaRtErr = cudaImportExternalSemaphore(&consData->signalSem,
&extSemDesc);
if (cudaSuccess != cudaRtErr) {
printf("Consumer failed (%x) to map signal object to semaphore\n",
cudaRtErr);
return 0;
}
/* Only send the sync object for the asynchronous element.
* If this function is not called for an element,
* the sync object is assumed to be NULL.
* In this use case, CRC element doesn't use sync object.
*/
sciErr = NvSciStreamBlockElementSignalObjSet(consData->common.block,
consData->dataIndex,
consData->signalObj);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to send sync object\n", sciErr);
return 0;
}
/* Indicate that sync object export is complete */
sciErr = NvSciStreamBlockSetupStatusSet(consData->common.block,
NvSciStreamSetup_SignalObjExport,
true);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to complete signal obj export\n",
sciErr);
return 0;
}
return 1;
}
/* Handle import of producer sync object */
static int32_t handleConsumerSyncImport(
ConsData* consData)
{
uint32_t cudaRtErr;
NvSciError sciErr;
NvSciSyncObj waiterObj = NULL;
/* Query sync object for asynchronous elements. */
sciErr = NvSciStreamBlockElementSignalObjGet(consData->common.block,
0U, consData->dataIndex,
&waiterObj);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to query sync object\n", sciErr);
return 0;
}
/* Save object */
consData->waiterObj = waiterObj;
/* If the waiter sync obj is NULL,
* it means this element is ready to use when received.
*/
if (NULL != waiterObj) {
/* Create CUDA semaphore for sync object */
struct cudaExternalSemaphoreHandleDesc extSemDesc;
memset(&extSemDesc, 0, sizeof(extSemDesc));
extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
extSemDesc.handle.nvSciSyncObj = waiterObj;
cudaRtErr = cudaImportExternalSemaphore(&consData->waiterSem,
&extSemDesc);
if (cudaSuccess != cudaRtErr) {
printf("Consumer failed (%x) to map waiter object to semaphore\n",
cudaRtErr);
return 0;
}
}
/* Indicate that element import is complete */
sciErr = NvSciStreamBlockSetupStatusSet(consData->common.block,
NvSciStreamSetup_SignalObjImport,
true);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to complete signal obj import\n",
sciErr);
return 0;
}
return 1;
}
/* Handle processing of payloads */
static int32_t handleConsumerPayload(
ConsData* consData)
{
NvSciError sciErr;
int32_t cudaRtErr;
/* Obtain packet with the new payload */
NvSciStreamCookie cookie;
sciErr = NvSciStreamConsumerPacketAcquire(consData->common.block,
&cookie);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to obtain packet for payload\n", sciErr);
return 0;
}
ConsPacket* packet = (ConsPacket*)cookie;
consData->counter++;
/* Extract CRC value from the packet */
uint32_t crc = *((uint32_t const*)(packet->crcPtr));
/* If the received waiter obj if NULL,
* the producer is done writing data into this element,
* skip waiting on pre-fence.
*/
if (NULL != consData->waiterObj) {
/* Query fences from producer for data element */
NvSciSyncFence prefence = NvSciSyncFenceInitializer;
sciErr = NvSciStreamBlockPacketFenceGet(
consData->common.block,
packet->handle,
0U, consData->dataIndex,
&prefence);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to query fence from producer\n",
sciErr);
return 0;
}
/* Instruct CUDA to wait for the producer fence */
struct cudaExternalSemaphoreWaitParams waitParams;
memset(&waitParams, 0, sizeof(waitParams));
waitParams.params.nvSciSync.fence = &prefence;
waitParams.flags = 0;
cudaRtErr = cudaWaitExternalSemaphoresAsync(&consData->waiterSem,
&waitParams, 1,
consData->cudaStream);
if (cudaSuccess != cudaRtErr) {
printf("Consumer failed (%x) to wait for prefence\n", cudaRtErr);
return 0;
}
NvSciSyncFenceClear(&prefence);
}
/* Instruct CUDA to copy the packet data buffer to the target buffer */
cudaRtErr = cudaMemcpy2DAsync(packet->dataDstMem,
consData->dataSize,
packet->dataDevMem,
consData->dataSize,
consData->dataSize,
1,
cudaMemcpyDeviceToHost,
consData->cudaStream);
if (cudaSuccess != cudaRtErr) {
printf("Consumer failed (%x) to issue copy command\n", cudaRtErr);
return 0;
}
/* Inform CUDA to signal a fence when the copy completes */
NvSciSyncFence postfence = NvSciSyncFenceInitializer;
struct cudaExternalSemaphoreSignalParams signalParams;
memset(&signalParams, 0, sizeof(signalParams));
signalParams.params.nvSciSync.fence = &postfence;
signalParams.flags = 0;
cudaRtErr = cudaSignalExternalSemaphoresAsync(&consData->signalSem,
&signalParams,
1,
consData->cudaStream);
if (cudaSuccess != cudaRtErr) {
printf("Consumer failed (%x) to signal postfence\n", cudaRtErr);
return 0;
}
/* Update postfence for this element */
sciErr = NvSciStreamBlockPacketFenceSet(consData->common.block,
packet->handle,
consData->dataIndex,
&postfence);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to set fence\n", sciErr);
return 0;
}
/* Release the packet back to the producer */
sciErr = NvSciStreamConsumerPacketRelease(consData->common.block,
packet->handle);
if (NvSciError_Success != sciErr) {
printf("Consumer failed (%x) to release packet\n", sciErr);
return 0;
}
NvSciSyncFenceClear(&postfence);
/*
* Wait for the copy to finish, generate a checkum and, compare
* with the value from the packet
*/
cudaStreamSynchronize(consData->cudaStream);
if (crc != generateCRC(packet->dataDstMem,
1,
consData->dataSize,
consData->dataSize)) {
printf("Consumer CRC does not match the one from producer\n");
}
return 1;
}
/* Handle events on a consumer block
*
* The consumer block informs the stream of the consumers buffer and
* synchronization requirements and capabilities, creates signaling
* synchronization objects and receives synchronization objects to
* wait for, maps buffers and synchronization objects to the consumer
* engine(s), and processes data.
*/
static int32_t handleConsumer(
void* data,
uint32_t wait)
{
/* Cast to consumer data */
ConsData* consData = (ConsData*)data;
/* Get time to wait */
int64_t waitTime = wait ? consData->common.waitTime : 0;
/* Query/wait for an event on the block */
NvSciStreamEventType event;
NvSciError err;
err = NvSciStreamBlockEventQuery(consData->common.block, waitTime, &event);
/* Handle errors */
if (NvSciError_Success != err) {
/* If not asked to wait, a timeout is not an error */
if (!waitTime && (NvSciError_Timeout == err)) {
return 0;
}
/* Otherwise, any error is considered fatal. A timeout probably
* indicates a failure to connect and complete setup in a timely
* fashion, so we specifically call out this case.
*/
if (NvSciError_Timeout == err) {
printf("Consumer timed out waiting for setup instructions\n");
} else {
printf("Consumer event query failed with error %x\n", err);
}
deleteConsumer(consData);
return -1;
}
/* If we received an event, handle it based on its type
*
* Note that there's a lot of error checking we could choose to do for
* some of these events, like making sure that we only receive each
* event once for a given entry. But NvSciStream is expected to take
* care of all of that, even when the application makes a mistake.
* So we only check for things that don't trigger NvSciStream errors.
*/
int32_t rv = 1;
NvSciError status;
switch (event) {
/*
* Any event we don't explicitly handle is a fatal error
*/
default:
printf("Consumer received unknown event %x\n", event);
rv = -1;
break;
/*
* Error events should never occur with safety-certified drivers,
* and are provided only in non-safety builds for debugging
* purposes. Even then, they should only occur when something
* fundamental goes wrong, like the system running out of memory,
* or stack/heap corruption, or a bug in NvSci which should be
* reported to NVIDIA.
*/
case NvSciStreamEventType_Error:
err = NvSciStreamBlockErrorGet(consData->common.block, &status);
if (NvSciError_Success != err) {
printf("%s Failed to query the error event code %x\n",
consData->common.name, err);
} else {
printf("%s received error event: %x\n",
consData->common.name, status);
}
rv = -1;
break;
/*
* If told to disconnect, it means either the stream finished its
* business or some other block had a failure. We'll just do a
* clean up and return without an error.
*/
case NvSciStreamEventType_Disconnected:
printf("Consumer disconnected after receiving %d payloads\n",
consData->counter);
rv = 2;
break;
/*
* On connection, the consumer should initialize the appopriate engine(s)
* and obtain the necessary buffer and synchronization attribute lists
* for the desired use case.
*/
case NvSciStreamEventType_Connected:
/* Initialize CUDA access */
if (!handleConsumerInit(consData)) {
rv = -1;
}
/* Determine supported buffer attributes */
else if (!handleConsumerElemSupport(consData)) {
rv = -1;
}
/* Determined supported sync attributes */
else if (!handleConsumerSyncSupport(consData)) {
rv = -1;
}
/* Now that we're fully connected, set the wait time to infinite */
consData->common.waitTime = -1;
break;
/* Retrieve all element information from pool */
case NvSciStreamEventType_Elements:
if (!handleConsumerElemSetting(consData)) {
rv = -1;
}
break;
/* For a packet, set up an entry in the array */
case NvSciStreamEventType_PacketCreate:
if (!handleConsumerPacketCreate(consData)) {
rv = -1;
}
break;
/* Finish any setup related to packet resources */
case NvSciStreamEventType_PacketsComplete:
/* For this use case, nothing else to setup.
* Inform the NvSciStream that the consumer has imported all packets.
*/
err = NvSciStreamBlockSetupStatusSet(consData->common.block,
NvSciStreamSetup_PacketImport,
true);
if (NvSciError_Success != err) {
printf("Consumer failed (%x) to complete packet import\n", err);
rv = -1;
}
break;
/* Delete a packet - usually only relevant for non-safety applications */
case NvSciStreamEventType_PacketDelete:
handleConsumerPacketDelete(consData);
break;
case NvSciStreamEventType_WaiterAttr:
if (!handleConsumerSyncExport(consData)) {
rv = -1;
}
break;
/* Import producer sync objects for all elements */
case NvSciStreamEventType_SignalObj:
if (!handleConsumerSyncImport(consData)) {
rv = -1;
}
break;
/* All setup complete. Transition to runtime phase */
case NvSciStreamEventType_SetupComplete:
printf("Consumer setup completed\n");
break;
/* Processs payloads when packets arrive */
case NvSciStreamEventType_PacketReady:
if (!handleConsumerPayload(consData)) {
rv = -1;
}
break;
}
/* On failure or final event, clean up the block */
if ((rv < 0) || (1 < rv)) {
deleteConsumer(consData);
}
return rv;
}
/* Create and register a new consumer block */
int32_t createConsumer_Usecase1(
NvSciStreamBlock* consumer,
NvSciStreamBlock pool,
uint32_t index)
{
/*
* The index is ignored. It is provided to support use cases where
* there are multiple consumers that don't all do the same thing.
*/
(void)index;
/* Create a data structure to track the block's status */
ConsData* consData = (ConsData*)calloc(1, sizeof(ConsData));
if (NULL == consData) {
printf("Failed to allocate data structure for consumer\n");
return 0;
}
/* Save the name for debugging purposes */
strcpy(consData->common.name, "Consumer");
/* Wait time for initial connection event will be 60 seconds */
consData->common.waitTime = 60 * 1000000;
/* Create a pool block */
NvSciError err =
NvSciStreamConsumerCreate(pool, &consData->common.block);
if (NvSciError_Success != err) {
printf("Failed (%x) to create consumer block\n", err);
deleteConsumer(consData);
return 0;
}
if (opts.endInfo) {
/* Add endpoint information on consumer side.
* Application can specify user-defined info to help set up stream,
* which can be queried by other blocks after stream connection.
*/
char info[INFO_SIZE] = {0};
size_t infoSize =
snprintf(info, INFO_SIZE, "%s%d", "Consumer proc: ", getpid());
err = NvSciStreamBlockUserInfoSet(consData->common.block,
ENDINFO_NAME_PROC,
infoSize, info);
if (NvSciError_Success != err) {
printf("Failed (%x) to setup the consumer info\n", err);
deleteConsumer(consData);
return 0;
}
}
/* Register block with event handling mechanism */
if (!eventFuncs->reg(consData->common.block, consData, handleConsumer)) {
deleteConsumer(consData);
return 0;
}
*consumer = consData->common.block;
return 1;
}