diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index a5356e0e..bed46b65 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -5,3 +5,4 @@ obj-m += mods/
 obj-m += nvsciipc/
 obj-m += tegra-pcie-dma-test.o
 obj-m += bluedroid_pm.o
+obj-m += nvscic2c-pcie/
diff --git a/drivers/misc/nvscic2c-pcie/Kconfig b/drivers/misc/nvscic2c-pcie/Kconfig
new file mode 100644
index 00000000..7b6a5a90
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/Kconfig
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+config NVSCIC2C_PCIE
+	tristate "NVIDIA Chip-to-Chip between PCIe-RP and PCIe-EP"
+	depends on PCI && PCI_ENDPOINT
+	default n
+	help
+	 This enables SoftwareCommunicationInterface for Host-to-Host
+	 communication between PCIe Rootport and PCIe Endpoint.
+	 If unsure, Please say N.
diff --git a/drivers/misc/nvscic2c-pcie/Makefile b/drivers/misc/nvscic2c-pcie/Makefile
new file mode 100644
index 00000000..550e18c4
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-y += -Werror
+ccflags-y += -DDYNAMIC_DEBUG_MODULE
+ccflags-y += -I$(srctree.nvidia-oot)/drivers/misc/nvscic2c-pcie
+#host1x-next and Tegra DRM are not moved to nvidia-oot include path yet.
+ccflags-y += -I$(srctree.nvidia)/drivers/gpu/host1x/include
+ccflags-y += -I$(srctree.nvidia)/drivers/gpu/drm/tegra/include
+
+ifdef CONFIG_PCIE_TEGRA194_EP
+obj-m := nvscic2c-pcie-epc.o nvscic2c-pcie-epf.o
+nvscic2c-pcie-epc-y := comm-channel.o dt.o endpoint.o epc/module.o iova-alloc.o iova-mngr.o pci-client.o stream-extensions.o vmap.o vmap-pin.o
+nvscic2c-pcie-epf-y := comm-channel.o dt.o endpoint.o epf/module.o iova-alloc.o iova-mngr.o pci-client.o stream-extensions.o vmap.o vmap-pin.o
+endif
diff --git a/drivers/misc/nvscic2c-pcie/comm-channel.c b/drivers/misc/nvscic2c-pcie/comm-channel.c
new file mode 100644
index 00000000..81d8b51e
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/comm-channel.c
@@ -0,0 +1,889 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#define pr_fmt(fmt)	"nvscic2c-pcie: comm-channel: " fmt
+
+#include <linux/atomic.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-iommu.h>
+#include <linux/errno.h>
+#include <linux/host1x-next.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+#include <uapi/misc/nvscic2c-pcie-ioctl.h>
+
+#include "comm-channel.h"
+#include "common.h"
+#include "module.h"
+#include "pci-client.h"
+
+#define CACHE_ALIGN		(64)
+
+/* Fifo size */
+/*
+ * This is wrong, but to have private communication channel functional at the
+ * earliest, we allocate large set of frames assuming all the available
+ * endpoints can share all possible export descriptors without having to block
+ * and wait for channel to become writeable.
+ *
+ * Despite this huge fifo size, if msg cannot be send, it either means remote
+ * is busy processing them quite slow (unlikely) or ill. In such a case, we
+ * shall return -EAGAIN for application to retry and application can bail-out
+ * after few retries.
+ */
+#define COMM_CHANNEL_NFRAMES	(1024)
+#define COMM_CHANNEL_FRAME_SZ	(64)
+
+/* fifo header.*/
+struct header {
+	u32 wr_count;
+	u32 rd_count;
+	u8 pad[CACHE_ALIGN - sizeof(u32) - sizeof(u32)];
+} __packed;
+
+/* kthread. */
+struct task_t {
+	struct task_struct *task;
+	wait_queue_head_t waitq;
+	struct completion shutdown_compl;
+	bool shutdown;
+	bool created;
+};
+
+/* Notification handling. */
+struct syncpt_t {
+	u32 id;
+	u32 threshold;
+	struct host1x_syncpt *sp;
+
+	/* PCIe aperture for writes to peer syncpoint for same comm-channel. */
+	struct pci_aper_t peer_mem;
+
+	/* syncpoint physical address for stitching to PCIe BAR backing.*/
+	size_t size;
+	phys_addr_t phy_addr;
+
+	/* iova mapping of client choice.*/
+	void *iova_block_h;
+	u64 iova;
+	bool mapped_iova;
+
+	/* Lock to protect fences between callback and deinit. */
+	struct mutex lock;
+	/* Fence to specific Threshold. */
+	struct dma_fence *fence;
+	struct dma_fence_cb fence_cb;
+	/* Work to notify and allocate new fence. */
+	struct work_struct work;
+	void (*notifier)(void *data);
+	void *notifier_data;
+	bool fence_release;
+};
+
+struct fifo_t {
+	/* slot/frames for the comm-channel.*/
+	u32 nframes;
+	u32 frame_sz;
+
+	/* fifo operations.*/
+	struct header *send_hdr;
+	struct header *recv_hdr;
+	struct header *local_hdr;
+	u8 *send;
+	u8 *recv;
+	u32 wr_pos;
+	u32 rd_pos;
+	/* serialize send operations.*/
+	struct mutex send_lock;
+
+	/* fifo physical pages and stitched to iova of client choice(recv).*/
+	struct cpu_buff_t self_mem;
+	void *iova_block_h;
+	u64 iova;
+	bool mapped_iova;
+
+	/* PCIe aperture for writes to peer comm fifo. */
+	struct pci_aper_t peer_mem;
+};
+
+struct comm_channel_ctx_t {
+	/* data. */
+	struct fifo_t fifo;
+
+	/* Notification. */
+	struct syncpt_t syncpt;
+
+	/* receive message task.*/
+	struct task_t r_task;
+	atomic_t recv_count;
+
+	/* Callbacks registered for recv messages. */
+	struct mutex cb_ops_lock;
+	struct callback_ops cb_ops[COMM_MSG_TYPE_MAXIMUM];
+
+	/* pci client handle.*/
+	void *pci_client_h;
+
+	/* nvscic2c-pcie DT node reference, used in getting syncpoint shim. */
+	struct device_node *of_node;
+	struct platform_device *host1x_pdev;
+};
+
+static inline bool
+can_send(struct fifo_t *fifo, int *ret)
+{
+	bool send = false;
+	u32 peer_toread =
+		(fifo->local_hdr->wr_count - fifo->recv_hdr->rd_count);
+
+	if (peer_toread < fifo->nframes) {
+		/* space available - can send.*/
+		send = true;
+		*ret = 0;
+	} else if (peer_toread == fifo->nframes) {
+		/* full. client can try again (at the moment.)*/
+		send = false;
+		*ret = -EAGAIN;	// -ENOMEM;
+	} else if (peer_toread > fifo->nframes) {
+		/* erroneous.*/
+		send = false;
+		*ret = -EOVERFLOW;
+	}
+
+	return send;
+}
+
+static inline bool
+can_recv(struct fifo_t *fifo, int *ret)
+{
+	bool recv = false;
+	u32 toread = (fifo->recv_hdr->wr_count - fifo->local_hdr->rd_count);
+
+	if (toread == 0) {
+		/* no frame available to read.*/
+		recv = false;
+		*ret = -ENODATA;
+	} else if (toread <= fifo->nframes) {
+		/* frames available - can read.*/
+		recv = true;
+		*ret = 0;
+	} else if (toread > fifo->nframes) {
+		/* erroneous.*/
+		recv = false;
+		*ret = -EOVERFLOW;
+	}
+
+	return recv;
+}
+
+static int
+send_msg(struct comm_channel_ctx_t *comm_ctx, struct comm_msg *msg)
+{
+	int ret = 0;
+	size_t size = 0;
+	void *from = NULL;
+	void __iomem *to = NULL;
+	struct fifo_t *fifo = NULL;
+	struct syncpt_t *syncpt = NULL;
+	enum peer_cpu_t peer_cpu = NVCPU_ORIN;
+
+	fifo = &comm_ctx->fifo;
+	syncpt = &comm_ctx->syncpt;
+
+	peer_cpu = pci_client_get_peer_cpu(comm_ctx->pci_client_h);
+	mutex_lock(&fifo->send_lock);
+
+	/* if no space available, at the moment, client can try again. */
+	if (!can_send(fifo, &ret)) {
+		mutex_unlock(&fifo->send_lock);
+		return ret;
+	}
+
+	to = (void __iomem *)(fifo->send + (fifo->wr_pos * fifo->frame_sz));
+	from = (void *)(msg);
+	size = sizeof(*msg);
+	memcpy_toio(to, from, size);
+
+	fifo->local_hdr->wr_count++;
+	writel(fifo->local_hdr->wr_count,
+	       (void __iomem *)(&fifo->send_hdr->wr_count));
+
+	if (peer_cpu == NVCPU_X86_64) {
+	/* comm-channel irq verctor always take from index 0 */
+		ret = pci_client_raise_irq(comm_ctx->pci_client_h, PCI_EPC_IRQ_MSI, 0);
+	} else {
+	/* notify peer for each write.*/
+		writel(0x1, syncpt->peer_mem.pva);
+	}
+
+	fifo->wr_pos = fifo->wr_pos + 1;
+	if (fifo->wr_pos >= fifo->nframes)
+		fifo->wr_pos = 0;
+
+	mutex_unlock(&fifo->send_lock);
+
+	return ret;
+}
+
+int
+comm_channel_ctrl_msg_send(void *comm_channel_h, struct comm_msg *msg)
+{
+	struct comm_channel_ctx_t *comm_ctx =
+				(struct comm_channel_ctx_t *)comm_channel_h;
+
+	if (WARN_ON(!comm_ctx || !msg))
+		return -EINVAL;
+
+	if (WARN_ON(msg->type != COMM_MSG_TYPE_BOOTSTRAP &&
+		    msg->type != COMM_MSG_TYPE_EDMA_RX_DESC_IOVA_RETURN &&
+		    msg->type != COMM_MSG_TYPE_SHUTDOWN &&
+		    msg->type != COMM_MSG_TYPE_LINK))
+		return -EINVAL;
+
+	/*
+	 * this is a special ctrl message where the sender: @DRV_MODE_EPC
+	 * or @DRV_MODE_EPF shares control information with peer. We do not
+	 * check for nvscic2c-pcie link status as these messages can flow
+	 * even when nvscic2c-pcie link status is down but possibly with
+	 * PCIE physical link between PCIe RP SoC and PCIe EP SoC alive.
+	 */
+
+	return send_msg(comm_ctx, msg);
+}
+
+int
+comm_channel_msg_send(void *comm_channel_h, struct comm_msg *msg)
+{
+	enum nvscic2c_pcie_link link = NVSCIC2C_PCIE_LINK_DOWN;
+	struct comm_channel_ctx_t *comm_ctx =
+				(struct comm_channel_ctx_t *)comm_channel_h;
+
+	if (WARN_ON(!comm_ctx || !msg))
+		return -EINVAL;
+
+	if (WARN_ON(msg->type != COMM_MSG_TYPE_REGISTER &&
+		    msg->type != COMM_MSG_TYPE_UNREGISTER))
+		return -EINVAL;
+
+	link = pci_client_query_link_status(comm_ctx->pci_client_h);
+
+	if (link != NVSCIC2C_PCIE_LINK_UP)
+		return -ENOLINK;
+
+	return send_msg(comm_ctx, msg);
+}
+
+static int
+recv_taskfn(void *arg)
+{
+	int ret = 0;
+	struct comm_channel_ctx_t *comm_ctx = NULL;
+	struct comm_msg *msg = NULL;
+	struct task_t *task = NULL;
+	struct fifo_t *fifo = NULL;
+	struct callback_ops *cb_ops = NULL;
+
+	comm_ctx = (struct comm_channel_ctx_t *)(arg);
+	task = &comm_ctx->r_task;
+	fifo = &comm_ctx->fifo;
+
+	while (!task->shutdown) {
+		/* wait for notification from peer or shutdown. */
+		wait_event_interruptible(task->waitq,
+					 (atomic_read(&comm_ctx->recv_count) ||
+					  task->shutdown));
+		/* task is exiting.*/
+		if (task->shutdown)
+			continue;
+
+		/* read all on single notify.*/
+		atomic_dec(&comm_ctx->recv_count);
+		while (can_recv(fifo, &ret)) {
+			msg = (struct comm_msg *)
+				(fifo->recv + (fifo->rd_pos * fifo->frame_sz));
+
+			if (msg->type > COMM_MSG_TYPE_INVALID &&
+			    msg->type < COMM_MSG_TYPE_MAXIMUM) {
+				mutex_lock(&comm_ctx->cb_ops_lock);
+				cb_ops = &comm_ctx->cb_ops[msg->type];
+
+				if (cb_ops->callback)
+					cb_ops->callback
+						((void *)msg, cb_ops->ctx);
+				mutex_unlock(&comm_ctx->cb_ops_lock);
+			}
+
+			fifo->local_hdr->rd_count++;
+
+			writel(fifo->local_hdr->rd_count,
+			       (void __iomem *)(&fifo->send_hdr->rd_count));
+
+			/* do not noifty peer for space availability. */
+
+			fifo->rd_pos = fifo->rd_pos + 1;
+			if (fifo->rd_pos >= fifo->nframes)
+				fifo->rd_pos = 0;
+		}
+
+		/* if nothing (left) to read, go back waiting. */
+		continue;
+	}
+
+	/* we do not use kthread_stop(), but wait on this.*/
+	complete(&task->shutdown_compl);
+	return 0;
+}
+
+/* Shall be invoked on expiry of syncpoint shim fence/trigger from remote. */
+static void
+syncpt_callback(void *data)
+{
+	struct comm_channel_ctx_t *comm_ctx = NULL;
+
+	if (WARN_ON(!data))
+		return;
+
+	comm_ctx = (struct comm_channel_ctx_t *)(data);
+
+	/* kick r_task for processing this notification.*/
+	atomic_inc(&comm_ctx->recv_count);
+	wake_up_interruptible_all(&comm_ctx->r_task.waitq);
+}
+
+static void
+host1x_cb_func(struct dma_fence *f, struct dma_fence_cb *cb)
+{
+	struct syncpt_t *syncpt = container_of(cb, struct syncpt_t, fence_cb);
+
+	schedule_work(&syncpt->work);
+}
+
+static int
+allocate_fence(struct syncpt_t *syncpt)
+{
+	int ret = 0;
+	struct dma_fence *fence = NULL;
+
+	fence = host1x_fence_create(syncpt->sp, ++syncpt->threshold, false);
+	if (IS_ERR_OR_NULL(fence)) {
+		ret = PTR_ERR(fence);
+		pr_err("host1x_fence_create failed with: %d\n", ret);
+		return ret;
+	}
+
+	mutex_lock(&syncpt->lock);
+	ret = dma_fence_add_callback(fence, &syncpt->fence_cb, host1x_cb_func);
+	if (ret != 0) {
+		/* If already expired. */
+		if (ret == -ENOENT) {
+			ret = 0;
+			schedule_work(&syncpt->work);
+		}
+		goto put_fence;
+	}
+	syncpt->fence = fence;
+	mutex_unlock(&syncpt->lock);
+
+	return ret;
+
+put_fence:
+	dma_fence_put(fence);
+	mutex_unlock(&syncpt->lock);
+	return ret;
+}
+
+static void
+fence_do_work(struct work_struct *work)
+{
+	int ret = 0;
+	struct syncpt_t *syncpt = container_of(work, struct syncpt_t, work);
+
+	if (syncpt->notifier)
+		syncpt->notifier(syncpt->notifier_data);
+
+	mutex_lock(&syncpt->lock);
+	/* If deinit triggered, no need to proceed. */
+	if (syncpt->fence_release)
+		return;
+
+	if (syncpt->fence) {
+		dma_fence_put(syncpt->fence);
+		syncpt->fence = NULL;
+	}
+	mutex_unlock(&syncpt->lock);
+
+	ret = allocate_fence(syncpt);
+	if (ret != 0) {
+		mutex_unlock(&syncpt->lock);
+		pr_err("allocate_fence failed with: %d\n", ret);
+		return;
+	}
+}
+
+static int
+start_msg_handling(struct comm_channel_ctx_t *comm_ctx)
+{
+	int ret = 0;
+	struct task_t *r_task = &comm_ctx->r_task;
+	struct syncpt_t *syncpt = &comm_ctx->syncpt;
+
+	/* start the recv msg processing task.*/
+	init_waitqueue_head(&r_task->waitq);
+	init_completion(&r_task->shutdown_compl);
+	r_task->shutdown = false;
+	r_task->task = kthread_run(recv_taskfn, comm_ctx,
+				   "comm-channel-recv-task");
+	if (IS_ERR_OR_NULL(r_task->task)) {
+		pr_err("Failed to create comm channel recv task\n");
+		return PTR_ERR(r_task->task);
+	}
+	r_task->created = true;
+
+	/* enable syncpt notifications handling. */
+	mutex_init(&syncpt->lock);
+	syncpt->threshold = host1x_syncpt_read(syncpt->sp);
+	syncpt->notifier = syncpt_callback;
+	syncpt->notifier_data = comm_ctx;
+	INIT_WORK(&syncpt->work, fence_do_work);
+	syncpt->fence_release = false;
+
+	ret = allocate_fence(syncpt);
+	if (ret != 0) {
+		pr_err("allocate_fence failed with: %d\n", ret);
+		return ret;
+	}
+
+	return ret;
+}
+
+static int
+stop_msg_handling(struct comm_channel_ctx_t *comm_ctx)
+{
+	int ret = 0;
+	struct task_t *r_task = NULL;
+	struct syncpt_t *syncpt = NULL;
+
+	if (!comm_ctx)
+		return ret;
+
+	syncpt = &comm_ctx->syncpt;
+	r_task = &comm_ctx->r_task;
+
+	if (r_task->created) {
+		/* Remove dma fence callback. */
+		mutex_lock(&syncpt->lock);
+		syncpt->fence_release = true;
+		if (syncpt->fence) {
+			ret = dma_fence_remove_callback(syncpt->fence,
+							&syncpt->fence_cb);
+			if (ret) {
+				/*
+				 * If dma_fence_remove_callback() returns true
+				 * means callback is removed successfully.
+				 * Cancel the fence to drop the refcount.
+				 */
+				host1x_fence_cancel(syncpt->fence);
+			}
+			dma_fence_put(syncpt->fence);
+			syncpt->fence = NULL;
+		}
+		mutex_unlock(&syncpt->lock);
+		cancel_work_sync(&syncpt->work);
+		mutex_destroy(&syncpt->lock);
+
+		/*
+		 * initiate stop.
+		 * we do not use kthread_stop(), but wait on this.
+		 */
+		r_task->shutdown = true;
+		wake_up_interruptible(&r_task->waitq);
+		ret = wait_for_completion_interruptible(&r_task->shutdown_compl);
+		if (ret)
+			pr_err("Failed to wait for completion\n");
+
+		r_task->created = false;
+	}
+
+	return ret;
+}
+
+static void
+free_syncpoint(struct comm_channel_ctx_t *comm_ctx)
+{
+	struct syncpt_t *syncpt = NULL;
+
+	if (!comm_ctx)
+		return;
+
+	syncpt = &comm_ctx->syncpt;
+
+	if (syncpt->peer_mem.pva) {
+		iounmap(syncpt->peer_mem.pva);
+		syncpt->peer_mem.pva = NULL;
+	}
+
+	if (syncpt->mapped_iova) {
+		pci_client_unmap_addr(comm_ctx->pci_client_h,
+				      syncpt->iova, syncpt->size);
+		syncpt->mapped_iova = false;
+	}
+
+	if (syncpt->iova_block_h) {
+		pci_client_free_iova(comm_ctx->pci_client_h,
+				     &syncpt->iova_block_h);
+		syncpt->iova_block_h = NULL;
+	}
+
+	if (syncpt->sp) {
+		host1x_syncpt_put(syncpt->sp);
+		syncpt->sp = NULL;
+	}
+}
+
+static int
+allocate_syncpoint(struct comm_channel_ctx_t *comm_ctx)
+{
+	int ret = 0;
+	int prot = 0;
+	struct host1x *host1x = NULL;
+	struct syncpt_t *syncpt = NULL;
+	size_t offsetof = 0x0;
+
+	syncpt = &comm_ctx->syncpt;
+
+	host1x = platform_get_drvdata(comm_ctx->host1x_pdev);
+	if (!host1x) {
+		pr_err("Host1x handle is null.");
+		return -EINVAL;
+	}
+	syncpt->sp = host1x_syncpt_alloc(host1x, HOST1X_SYNCPT_CLIENT_MANAGED,
+					 "nvscic2c-pcie-comm-ch");
+	if (IS_ERR_OR_NULL(syncpt->sp)) {
+		ret = -ENOMEM;
+		pr_err("Failed to reserve comm notify syncpt\n");
+		goto err;
+	}
+
+	syncpt->id = host1x_syncpt_id(syncpt->sp);
+	/* physical address of syncpoint shim. */
+	syncpt->phy_addr = get_syncpt_shim_offset(syncpt->id);
+	syncpt->size = SP_MAP_SIZE;
+
+	/* reserve iova with the iova manager.*/
+	ret = pci_client_alloc_iova(comm_ctx->pci_client_h, syncpt->size,
+				    &syncpt->iova, &offsetof,
+				    &syncpt->iova_block_h);
+	if (ret) {
+		pr_err("Err reserving comm syncpt iova region of size: 0x%lx\n",
+		       syncpt->size);
+		goto err;
+	}
+
+	/* map the pages to the reserved iova. */
+	prot = (IOMMU_CACHE | IOMMU_READ | IOMMU_WRITE);
+	ret = pci_client_map_addr(comm_ctx->pci_client_h, syncpt->iova,
+				  syncpt->phy_addr, syncpt->size, prot);
+	if (ret) {
+		pr_err("Err mapping comm SP physical addr to reserved iova\n");
+		goto err;
+	}
+	syncpt->mapped_iova = true;
+
+	pr_debug("mapped phy:0x%pa[p]+0x%lx to iova:0x%llx\n",
+		 &syncpt->phy_addr, syncpt->size, syncpt->iova);
+
+	/*
+	 * get peer's aperture offset. Map tx (pcie aper for notif tx.)
+	 * for peer's access of comm-syncpt, it is assumed offsets are
+	 * same on both SoC.
+	 */
+	syncpt->peer_mem.size = syncpt->size;
+	ret = pci_client_get_peer_aper(comm_ctx->pci_client_h, offsetof,
+				       syncpt->peer_mem.size,
+				       &syncpt->peer_mem.aper);
+	if (ret) {
+		pr_err("Failed to get comm peer's syncpt aperture\n");
+		goto err;
+	}
+	syncpt->peer_mem.pva = ioremap(syncpt->peer_mem.aper,
+				       syncpt->peer_mem.size);
+	if (!syncpt->peer_mem.pva) {
+		ret = -ENOMEM;
+		pr_err("Failed to ioremap comm peer's syncpt pcie aperture\n");
+		goto err;
+	}
+
+	return ret;
+err:
+	free_syncpoint(comm_ctx);
+	return ret;
+}
+
+static void
+free_fifo_memory(struct comm_channel_ctx_t *comm_ctx)
+{
+	struct fifo_t *fifo = NULL;
+
+	if (!comm_ctx)
+		return;
+
+	fifo = &comm_ctx->fifo;
+
+	if (fifo->local_hdr) {
+		kfree((void *)fifo->local_hdr);
+		fifo->local_hdr = NULL;
+	}
+
+	if (fifo->peer_mem.pva) {
+		iounmap(fifo->peer_mem.pva);
+		fifo->peer_mem.pva = NULL;
+	}
+
+	if (fifo->mapped_iova) {
+		pci_client_unmap_addr(comm_ctx->pci_client_h,
+				      fifo->iova, fifo->self_mem.size);
+		fifo->mapped_iova = false;
+	}
+
+	if (fifo->iova_block_h) {
+		pci_client_free_iova(comm_ctx->pci_client_h,
+				     &fifo->iova_block_h);
+		fifo->iova_block_h = NULL;
+	}
+
+	if (fifo->self_mem.pva) {
+		free_pages_exact(fifo->self_mem.pva,
+				 fifo->self_mem.size);
+		fifo->self_mem.pva = NULL;
+	}
+
+	mutex_destroy(&fifo->send_lock);
+}
+
+static int
+allocate_fifo_memory(struct comm_channel_ctx_t *comm_ctx)
+{
+	int ret = 0;
+	int prot = 0;
+	size_t offsetof = 0x0;
+	struct fifo_t *fifo = &comm_ctx->fifo;
+
+	mutex_init(&fifo->send_lock);
+
+	/* memory size includes frames and header.*/
+	fifo->nframes = COMM_CHANNEL_NFRAMES;
+	fifo->frame_sz = COMM_CHANNEL_FRAME_SZ;
+	fifo->self_mem.size = (fifo->nframes * fifo->frame_sz);
+	fifo->self_mem.size += sizeof(struct header);
+	fifo->self_mem.size = ALIGN(fifo->self_mem.size, PAGE_SIZE);
+	fifo->self_mem.pva = alloc_pages_exact(fifo->self_mem.size,
+					       (GFP_KERNEL | __GFP_ZERO));
+	if (!fifo->self_mem.pva) {
+		pr_err("Error allocating fifo contiguous pages: (%lu)\n",
+		       (fifo->self_mem.size >> PAGE_SHIFT));
+		return -ENOMEM;
+	}
+	fifo->self_mem.phys_addr =
+				page_to_phys(virt_to_page(fifo->self_mem.pva));
+
+	/* reserve iova for stitching comm channel memory for peer access.*/
+	ret = pci_client_alloc_iova(comm_ctx->pci_client_h, fifo->self_mem.size,
+				    &fifo->iova, &offsetof,
+				    &fifo->iova_block_h);
+	if (ret) {
+		pr_err("Failed reserving fifo iova region of size: 0x%lx\n",
+		       fifo->self_mem.size);
+		goto err;
+	}
+
+	/* map the pages to the reserved iova.*/
+	prot = (IOMMU_CACHE | IOMMU_READ | IOMMU_WRITE);
+	ret = pci_client_map_addr(comm_ctx->pci_client_h, fifo->iova,
+				  fifo->self_mem.phys_addr, fifo->self_mem.size,
+				  prot);
+	if (ret) {
+		pr_err("Failed to map comm fifo pages to reserved iova\n");
+		goto err;
+	}
+	fifo->mapped_iova = true;
+
+	pr_debug("comm fifo mapped page:0x%pa[p]+0x%lx to iova:0x%llx\n",
+		 &fifo->self_mem.phys_addr, fifo->self_mem.size, fifo->iova);
+
+	/*
+	 * for peer's access of comm-fifo, it is assumed offsets are
+	 * same on both SoC.
+	 */
+	fifo->peer_mem.size = fifo->self_mem.size;
+	ret = pci_client_get_peer_aper(comm_ctx->pci_client_h, offsetof,
+				       fifo->peer_mem.size,
+				       &fifo->peer_mem.aper);
+	if (ret) {
+		pr_err("Failed to get comm peer's fifo aperture\n");
+		goto err;
+	}
+	fifo->peer_mem.pva = ioremap(fifo->peer_mem.aper, fifo->peer_mem.size);
+	if (!fifo->peer_mem.pva) {
+		ret = -ENOMEM;
+		pr_err("Failed to ioremap peer's comm fifo aperture\n");
+		goto err;
+	}
+
+	/* allocate local header.*/
+	fifo->local_hdr = kzalloc(sizeof(*fifo->local_hdr), GFP_KERNEL);
+	if (WARN_ON(!fifo->local_hdr)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	fifo->recv_hdr = (struct header *)(fifo->self_mem.pva);
+	fifo->send_hdr = (__force struct header *)(fifo->peer_mem.pva);
+	fifo->recv = ((u8 *)fifo->recv_hdr + sizeof(struct header));
+	fifo->send = ((u8 *)fifo->send_hdr + sizeof(struct header));
+
+	return ret;
+
+err:
+	free_fifo_memory(comm_ctx);
+	return ret;
+}
+
+int
+comm_channel_init(struct driver_ctx_t *drv_ctx, void **comm_channel_h)
+{
+	int ret = 0;
+	struct comm_channel_ctx_t *comm_ctx = NULL;
+
+	if (WARN_ON(sizeof(struct comm_msg) > COMM_CHANNEL_FRAME_SZ))
+		return -EINVAL;
+
+	/* should not be an already instantiated. */
+	if (WARN_ON(!drv_ctx || !comm_channel_h || *comm_channel_h))
+		return -EINVAL;
+
+	/* start by allocating the comm ctx.*/
+	comm_ctx = kzalloc(sizeof(*comm_ctx), GFP_KERNEL);
+	if (WARN_ON(!comm_ctx))
+		return -ENOMEM;
+	mutex_init(&comm_ctx->cb_ops_lock);
+	atomic_set(&comm_ctx->recv_count, 0);
+
+	comm_ctx->pci_client_h = drv_ctx->pci_client_h;
+	comm_ctx->of_node = drv_ctx->drv_param.of_node;
+	comm_ctx->host1x_pdev = drv_ctx->drv_param.host1x_pdev;
+
+	/*
+	 * allocate fifo area, make it visible to peer. Assume same aperture
+	 * for peer access too.
+	 */
+	ret = allocate_fifo_memory(comm_ctx);
+	if (ret)
+		goto err;
+
+	/*
+	 * allocate notification for comm-channel, Assume same aperture
+	 * for peer access too.
+	 */
+	ret = allocate_syncpoint(comm_ctx);
+	if (ret)
+		goto err;
+
+	/* we can now wait for notifications/messages to be received.*/
+	ret = start_msg_handling(comm_ctx);
+	if (ret)
+		goto err;
+
+	*comm_channel_h = comm_ctx;
+	return ret;
+err:
+	comm_channel_deinit((void **)&comm_ctx);
+	return ret;
+}
+
+void
+comm_channel_deinit(void **comm_channel_h)
+{
+	struct comm_channel_ctx_t *comm_ctx =
+				(struct comm_channel_ctx_t *)(*comm_channel_h);
+	if (!comm_ctx)
+		return;
+
+	stop_msg_handling(comm_ctx);
+	free_syncpoint(comm_ctx);
+	free_fifo_memory(comm_ctx);
+	mutex_destroy(&comm_ctx->cb_ops_lock);
+	kfree(comm_ctx);
+
+	*comm_channel_h = NULL;
+}
+
+int
+comm_channel_register_msg_cb(void *comm_channel_h, enum comm_msg_type type,
+			     struct callback_ops *ops)
+{
+	int ret = 0;
+	struct callback_ops *cb_ops = NULL;
+	struct comm_channel_ctx_t *comm_ctx =
+				(struct comm_channel_ctx_t *)comm_channel_h;
+
+	if (WARN_ON(!comm_ctx || !ops || !ops->callback))
+		return -EINVAL;
+
+	if (WARN_ON(type <= COMM_MSG_TYPE_INVALID ||
+		    type >= COMM_MSG_TYPE_MAXIMUM))
+		return -EINVAL;
+
+	mutex_lock(&comm_ctx->cb_ops_lock);
+
+	cb_ops = &comm_ctx->cb_ops[type];
+	if (cb_ops->callback) {
+		pr_err("Callback for msg type: (%u) is already taken\n", type);
+		ret = -EBUSY;
+	} else {
+		cb_ops->callback = ops->callback;
+		cb_ops->ctx = ops->ctx;
+	}
+
+	mutex_unlock(&comm_ctx->cb_ops_lock);
+	return ret;
+}
+
+int
+comm_channel_unregister_msg_cb(void *comm_channel_h, enum comm_msg_type type)
+{
+	int ret = 0;
+	struct callback_ops *cb_ops = NULL;
+	struct comm_channel_ctx_t *comm_ctx =
+				(struct comm_channel_ctx_t *)comm_channel_h;
+
+	if (WARN_ON(!comm_ctx))
+		return -EINVAL;
+
+	if (WARN_ON(type <= COMM_MSG_TYPE_INVALID ||
+		    type >= COMM_MSG_TYPE_MAXIMUM))
+		return -EINVAL;
+
+	mutex_lock(&comm_ctx->cb_ops_lock);
+	cb_ops = &comm_ctx->cb_ops[type];
+	cb_ops->callback = NULL;
+	cb_ops->ctx = NULL;
+	mutex_unlock(&comm_ctx->cb_ops_lock);
+
+	return ret;
+}
diff --git a/drivers/misc/nvscic2c-pcie/comm-channel.h b/drivers/misc/nvscic2c-pcie/comm-channel.h
new file mode 100644
index 00000000..61d5ec19
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/comm-channel.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __COMM_CHANNEL_H__
+#define __COMM_CHANNEL_H__
+
+#include <uapi/misc/nvscic2c-pcie-ioctl.h>
+
+#include "common.h"
+
+/* forward declaration. */
+struct driver_ctx_t;
+
+enum comm_msg_type {
+	/* invalid.*/
+	COMM_MSG_TYPE_INVALID = 0,
+
+	/*
+	 * One time message from peer @DRV_MODE_EPC (PCIe RP) towards
+	 * @DRV_MODE_EPF(PCIe EP) for boot-strap mechanism.
+	 */
+	COMM_MSG_TYPE_BOOTSTRAP,
+
+	/* Link status shared between @DRV_MODE_EPC and @DRV_MODE_EPF.*/
+	COMM_MSG_TYPE_LINK,
+
+	/* Share/Register export object with peer.*/
+	COMM_MSG_TYPE_REGISTER,
+
+	/* Unregister exported object back with peer.*/
+	COMM_MSG_TYPE_UNREGISTER,
+
+	/* return edma rx descriptor iova to peer x86 */
+	COMM_MSG_TYPE_EDMA_RX_DESC_IOVA_RETURN,
+
+	/*
+	 * One time message from peer @DRV_MODE_EPC (PCIe RP) towards
+	 * @DRV_MODE_EPF(PCIe EP) for initiating shutdown.
+	 */
+	COMM_MSG_TYPE_SHUTDOWN,
+
+	/* Maximum. */
+	COMM_MSG_TYPE_MAXIMUM,
+};
+
+/*
+ * For @DRV_MODE_EPF(PCIe EP), it doesn't know the send area towards
+ * @DRV_MODE_EPC (PCIe RP - there is not BAR with PCIe RP). This is the first
+ * message and only once sent by @DRV_MODE_EPC towards @DRV_MODE_EPF for latter
+ * to configure it's outbound translation.
+ */
+struct comm_msg_bootstrap {
+	u64 iova;
+	enum peer_cpu_t  peer_cpu;
+};
+
+/* to simply,only one channel c2c remote edma case   */
+struct comm_msg_edma_rx_desc_iova {
+	dma_addr_t iova;
+};
+
+/* Link status shared between @DRV_MODE_EPC and @DRV_MODE_EPF.
+ * Possible use: @DRV_MODE_EPC sends bootstrap message
+ * to @DRV_MODE_EPF without setting it's own PCIe link = UP, therefore,
+ * on compeleting initialization, @DRV_MODE_EPF(once bootstrap msg
+ * is received) shall send link = up message to @DRV_MODE_EPC.
+ */
+struct comm_msg_link {
+	enum nvscic2c_pcie_link status;
+};
+
+/*
+ * Private channel communication message sent by NvSciC2cPcie consumer
+ * towards producer to register the exported object at the NvSciC2cPcie
+ * producer SoC.
+ */
+struct comm_msg_register {
+	u64 export_desc;
+	u64 iova;
+	size_t size;
+	size_t offsetof;
+};
+
+/*
+ * Private channel communication message sent by NvSciC2cPcie producer
+ * towards onsumer to unregister it's exported object.
+ */
+struct comm_msg_unregister {
+	u64 export_desc;
+	u64 iova;
+	size_t size;
+	size_t offsetof;
+};
+
+struct comm_msg {
+	enum comm_msg_type type;
+	union data {
+		struct comm_msg_bootstrap bootstrap;
+		struct comm_msg_link link;
+		struct comm_msg_register reg;
+		struct comm_msg_unregister unreg;
+		struct comm_msg_edma_rx_desc_iova edma_rx_desc_iova;
+	} u;
+} __aligned(8);
+
+int
+comm_channel_init(struct driver_ctx_t *drv_ctx, void **comm_channel_h);
+
+void
+comm_channel_deinit(void **comm_channel_h);
+
+int
+comm_channel_msg_send(void *comm_channel_h, struct comm_msg *msg);
+
+int
+comm_channel_ctrl_msg_send(void *comm_channel_h, struct comm_msg *msg);
+
+int
+comm_channel_register_msg_cb(void *comm_channel_h, enum comm_msg_type type,
+			     struct callback_ops *ops);
+int
+comm_channel_unregister_msg_cb(void *comm_channel_h, enum comm_msg_type type);
+
+int
+comm_channel_edma_rx_desc_iova_send(void *comm_channel_h, struct comm_msg *msg);
+
+#endif //__COMM_CHANNEL_H__
diff --git a/drivers/misc/nvscic2c-pcie/common.h b/drivers/misc/nvscic2c-pcie/common.h
new file mode 100644
index 00000000..e1b1c40b
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/common.h
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#define MODULE_NAME		"nvscic2c-pcie"
+#define DRIVER_NAME_EPF		"nvscic2c-pcie-epf"
+#define DRIVER_NAME_EPC		"nvscic2c-pcie-epc"
+
+/* STREAM_OBJ_TYPE. */
+#define STREAM_OBJ_TYPE_MEM	(0)
+#define STREAM_OBJ_TYPE_SYNC	(1)
+
+/*
+ * This capped number shall be used to derive export descriptor, therefore any
+ * change should be evaluated thoroughly.
+ */
+#define MAX_STREAM_MEMOBJS	(1024)
+
+/*
+ * This capped number shall be used to derive export descriptor, therefore any
+ * change should be evaluated thoroughly.
+ */
+#define MAX_STREAM_SYNCOBJS	(1024)
+
+/*
+ * In a topology of interconnected Boards + SoCs.
+ *
+ * This capped number shall be used to derive export descriptor, therefore any
+ * change should be evaluated thoroughly.
+ */
+#define MAX_BOARDS		(16)
+#define MAX_SOCS		(16)
+#define MAX_PCIE_CNTRLRS	(16)
+
+/*
+ * Maximum NvSciIpc INTER_CHHIP(NvSciC2cPcie) endpoints that can be supported
+ * for single pair of PCIe RP<>EP connection (referred just as 'connection'
+ * henceforth). We have specific customer need for a set of Eleven NvSciC2cPcie
+ * endpoints for single connection.
+ *
+ * This capped number shall be used to derive export descriptor, therefore any
+ * change should be evaluated thoroughly.
+ */
+#define MAX_ENDPOINTS		(16)
+
+/*
+ * Each NvSciIpc INTER_CHIP(NvSciC2cPcie) endpoint shall require at least one
+ * distinct notification Id (MSI/MSI-X, GIC SPI or NvRmHost1xSyncpointShim).
+ * Also, these notification mechanisms: MSI/MSI-X, GIC SPI, SyncpointShim are
+ * limited on SoC or per connection (configurable via device-tree).
+ *
+ * Also, there is a private communication channel between the two ends of a
+ * single connection that need notification Ids for message passing. Assuming
+ * this private communication channel to be a Queue-Pair (Cmd, Resp), need
+ * at least 2 distinct notification Ids for it on a single connection.
+ */
+#define MIN_NUM_NOTIFY		(MAX_ENDPOINTS + (2))
+
+/* NvRmHost1xSyncpointShim have size: 64KB on Orin.*/
+#define SP_SIZE			(0x10000)
+
+/*
+ * Represents SyncpointShimBase on all T234.
+ * Each syncpoint is offset at (syncpt_id * SP_SIZE) on SHIM_BASE.
+ */
+#define SHIM_BASE		(0x60000000)
+/*
+ * For our use-case, if only 4 bytes of NvRmHost1xSynpointShim aperture mapped
+ * to PCIe device, any writes of (SZ_4B) from remote is enough to increment
+ * the Syncpoint. Hence we only map 4KB/PAGE_SIZE instead of full 64KB.
+ */
+#define SP_MAP_SIZE		(0x1000)
+
+/* With Tegra as PCIe function we can have only one PCIe function. */
+#define PCIE_VFNO		(0)
+
+/*
+ * NvSciC2c supports three concurrent PCI RP<>EP connection.
+ * These are three PCI Function Device ID's to be configured in PCI header
+ * when Tegra acting as PCI Function to peer Tegra acting as PCI RP.
+ */
+#define PCI_DEVICE_ID_C2C_1	(0x22CB)
+#define PCI_DEVICE_ID_C2C_2	(0x22CC)
+#define PCI_DEVICE_ID_C2C_3	(0x22CD)
+
+/*
+ * For NvStreams extensions over NvSciC2cPcie, an endpoint is a producer on
+ * one SoC and a corresponding consumer on the remote SoC. The role
+ * classification cannot be deduced in KMD.
+ */
+
+/*
+ * PCIe BAR aperture for Tx to/Rx from peer.
+ */
+struct pci_aper_t {
+	/* physical Pcie aperture.*/
+	phys_addr_t aper;
+
+	/* process virtual address for CPU access.*/
+	void __iomem *pva;
+
+	/* size of the perture.*/
+	size_t size;
+};
+
+/*
+ * DMA'able memory registered/exported to peer -
+ * either allocated by dma_buf API or physical pages pinned to
+ * pcie address space(dma_handle).
+ */
+struct dma_buff_t {
+	/* process virtual address for CPU access. */
+	void *pva;
+
+	/* iova(iommu=ON) or bus address/physical address for device access. */
+	dma_addr_t dma_handle;
+
+	/* physical address.*/
+	u64 phys_addr;
+
+	/* size of the memory allocated. */
+	size_t size;
+};
+
+/*
+ * CPU-only accessible memory which is not PCIe aper or PCIe
+ * DMA'able memory. This shall contain information of memory
+ * allocated via kalloc()/likewise.
+ */
+struct cpu_buff_t {
+	/* process virtual address for CPU access. */
+	void *pva;
+
+	/* (va->pa) physical address. */
+	u64 phys_addr;
+
+	/* size of the memory allocated. */
+	size_t size;
+};
+
+/*
+ * Callback options for user to register with occurrence of an event.
+ */
+struct callback_ops {
+	/*
+	 * User callback to be invoked.
+	 * @data: Event-type or likewise data. read-only for user.
+	 * @ctx: user ctx returned as-is in the callback.
+	 */
+	void (*callback)(void *data, void *ctx);
+
+	/* user context that shall be passed with @callback.*/
+	void *ctx;
+};
+
+/*
+ * Node information. A combination of Board + SoC + PCIe controller
+ * should be unique within the PCIe controllers/SoCs/Boards interconnected
+ for NvSciC2cPcie.
+ */
+struct node_info_t {
+	u32 board_id;
+	u32 soc_id;
+	u32 cntrlr_id;
+};
+
+/*
+ * NvSciC2cPcie either works as EndpointClient module - client driver for
+ * remote PCIe EP (runs on the PCIe RP SoC) or as EndpointFunction module -
+ * PCIe EP function driver (runs on the PCIe EP SoC).
+ */
+enum drv_mode_t {
+	/* Invalid. */
+	DRV_MODE_INVALID = 0,
+
+	/* Driver module runs as EndpointClient driver.*/
+	DRV_MODE_EPC,
+
+	/* Drive module runs as EndpointFunction driver.*/
+	DRV_MODE_EPF,
+
+	/* Maximum.*/
+	DRV_MODE_MAXIMUM,
+};
+
+/*
+ * NvSciC2cPcie the cpu on peer
+ */
+enum peer_cpu_t {
+	NVCPU_ORIN = 0,
+	NVCPU_X86_64,
+	NVCPU_MAXIMUM,
+};
+
+/* Returns aperture offset of syncpoint on SHIM_BASE. */
+static inline u64 get_syncpt_shim_offset(u32 id)
+{
+	return (SHIM_BASE + (id * SP_SIZE));
+}
+#endif //__COMMON_H__
diff --git a/drivers/misc/nvscic2c-pcie/descriptor.h b/drivers/misc/nvscic2c-pcie/descriptor.h
new file mode 100644
index 00000000..910cf59c
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/descriptor.h
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __DESCRIPTOR_H__
+#define __DESCRIPTOR_H__
+
+#include <linux/errno.h>
+
+#include "common.h"
+
+/* Magic code for descriptor.*/
+#define DESC_MAGIC_CODE_32BIT	(0x69152734)
+
+/*
+ * Format of Export Descriptor (at the moment)
+ * 0xXXXXXXXXRRRREIII
+ * 32bit(XXXXXXXX00000000): Reserved.
+ * 04bit(00000000B0000000): Peer Board Id.
+ * 04bit(000000000S000000): Peer SoC Id.
+ * 04bit(0000000000C00000): Peer PCIe Controller Id.
+ * 04bit(00000000000E0000): Endpoint Id.
+ * 04bit(000000000000X000): Reserved.
+ * 12bit(0000000000000III): Obj type(1bit) + Obj Id(11bits).
+ *                          (Bit 11 : ObjType - Mem/Sync)
+ *                          (Bit 0-10 : ObjId - Mem or Sync Obj Id)
+ *
+ * Board Id and SoC Id together can be a Node Id to allow for cases, where SoC
+ * on a single board: [0-63] and number of boards: [0-3]. Essentially uniquely
+ * identifying each SoC inter-connected within or across the boards.
+ */
+
+/*
+ * Topology can have a:
+ * A or a Set of boards
+ *   - (Assumed [0, 15]).
+ * Each Board can have a or a set of SoC(s)
+ *   - ID : [0, 15].
+ * Each SoC can have a or a set of PCIe controllers either in RP or EP mode.
+ *   - ID:  [0, 15].
+ * Each Controller can have a or a set of NvSciIpc INTER_CHIP endpoints.
+ *   - ID:  [0, 15].
+ * Each NvSciIpc INTER_CHIP can export either a Mem object or Sync object
+ *   - STREAM_OBJ_TYPE_MEM or STREAM_OBJ_TYPE_SYNC
+ *   - Type: [0, 1].
+ * Each NvSciIpc INTER_CHIP can export a set of either Mem or Sync objects.
+ *   - ID:   [0, 2047].
+ */
+struct descriptor_bit_t {
+	u64 reserved1     : 32;
+	u64 board_id      : 4;
+	u64 soc_id        : 4;
+	u64 cntrlr_id     : 4;
+	u64 endpoint_id   : 4;
+	u64 reserved2     : 4;
+	u64 handle_type   : 1;
+	u64 handle_id     : 11;
+};
+
+/* bit-field manipulation. */
+union descriptor_t {
+	u64 value;
+	struct descriptor_bit_t bit;
+};
+
+/* Generate a descriptor (auth token) */
+static inline u64
+gen_desc(u32 peer_board_id, u32 peer_soc_id, u32 peer_cntrlr_id, u32 ep_id,
+	 u32 handle_type, s32 handle_id)
+{
+	union descriptor_t desc;
+
+	desc.bit.reserved1 = DESC_MAGIC_CODE_32BIT;
+	desc.bit.board_id = peer_board_id;
+	desc.bit.soc_id = peer_soc_id;
+	desc.bit.cntrlr_id = peer_cntrlr_id;
+	desc.bit.endpoint_id = ep_id;
+	desc.bit.handle_type = handle_type;
+	desc.bit.handle_id = handle_id;
+
+	return desc.value;
+}
+
+/* Validate a descriptor (auth token) */
+static inline int
+validate_desc(u64 in_desc, u32 local_board_id, u32 local_soc_id,
+	      u32 local_cntrlr_id, u32 ep_id)
+{
+	int ret = 0;
+	union descriptor_t desc;
+
+	desc.value = in_desc;
+	if (desc.bit.reserved1 != DESC_MAGIC_CODE_32BIT ||
+	    desc.bit.board_id != local_board_id ||
+	    desc.bit.soc_id != local_soc_id ||
+	    desc.bit.cntrlr_id != local_cntrlr_id ||
+	    desc.bit.endpoint_id != ep_id) {
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/* return handle type embedded in the descriptor (auth token) */
+static inline u32
+get_handle_type_from_desc(u64 in_desc)
+{
+	union descriptor_t desc;
+
+	desc.value = in_desc;
+	return (u32)desc.bit.handle_type;
+}
+
+/*
+ * Board Id, SoC Id, PCIe Controller Id should not be beyond 16 [0-15] - We have
+ * reserved 4b each for boardId to generate export descriptor.
+ */
+#if MAX_BOARDS > (0xF + 1)
+	#error MAX_BOARDS assumed to be less-than or equal to (16)
+#endif
+#if MAX_SOCS > (0xF + 1)
+	#error MAX_SOCS assumed to be less-than or equal to (16)
+#endif
+#if MAX_PCIE_CNTRLRS > (0xF + 1)
+	#error MAX_PCIE_CNTRLRS assumed to be less-than or equal to (16)
+#endif
+
+/*
+ * Endpoints should not be beyond 16 [0-15] - We have reserved 4b for
+ * endpoint Id to generate export descriptor (although we could use
+ * up the reserved2 if needed).
+ */
+#if MAX_ENDPOINTS > (0xF + 1)
+	#error MAX_ENDPOINTS to be less than or equal to (16)
+#endif
+
+/*
+ * Memory or Sync object indicator in descriptor should not be beyond 1 [0-1].
+ * The value must be less than 1 as the descriptor accounts just 1b (1bit).
+ */
+#if STREAM_OBJ_TYPE_MEM > (0x1)
+	#error STREAM_OBJ_TYPE_MEM to be less-than or equal-to (1)
+#endif
+#if STREAM_OBJ_TYPE_SYNC > (0x1)
+	#error STREAM_OBJ_TYPE_SYNC to be less-than or equal-to (1)
+#endif
+
+/*
+ * Mem objects should not be beyond 2048 [0-2047] - We have reserved 11b for
+ * Obj Id to generate export descriptor.
+ */
+#if MAX_STREAM_MEMOBJS > (0x7FF + 1)
+	#error MAX_STREAM_MEMOBJS to be less than or equal to (2048)
+#endif
+
+/*
+ * Sync objects should not be beyond 2048 [0-2047] - We have reserved 11b for
+ * Obj Id to generate export descriptor.
+ */
+#if MAX_STREAM_SYNCOBJS > (0x7FF + 1)
+	#error MAX_STREAM_SYNCOBJS to be less than or equal to (2048)
+#endif
+#endif //__DESCRIPTOR_H__
diff --git a/drivers/misc/nvscic2c-pcie/dt.c b/drivers/misc/nvscic2c-pcie/dt.c
new file mode 100644
index 00000000..ed406f09
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/dt.c
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#define pr_fmt(fmt)	"nvscic2c-pcie: dt: " fmt
+
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+
+#include "common.h"
+#include "module.h"
+
+#define COMPATIBLE_EPC_PROP_VAL		("nvidia,tegra-nvscic2c-pcie-epc")
+#define COMPATIBLE_EPF_PROP_VAL		("nvidia,tegra-nvscic2c-pcie-epf")
+#define HOST1X_PHANDLE_PROP_NAME	("nvidia,host1x")
+#define EDMA_PHANDLE_PROP_NAME		("nvidia,pcie-edma")
+#define PCI_DEV_ID_PROP_NAME		("nvidia,pci-dev-id")
+#define BAR_WIN_SZ_PROP_NAME		("nvidia,bar-win-size")
+#define BOARD_ID_PROP_NAME		("nvidia,board-id")
+#define SOC_ID_PROP_NAME		("nvidia,soc-id")
+#define CNTRLR_ID_PROP_NAME		("nvidia,cntrlr-id")
+#define ENDPOINT_DB_PROP_NAME		("nvidia,endpoint-db")
+#define MAX_PROP_LEN			(1024)
+#define FRAME_SZ_ALIGN			(64)
+
+#define MAX_FRAME_SZ			(SZ_32K)
+#define MAX_NFRAMES			(64)
+#define MIN_BAR_WIN_SZ			(SZ_64M)
+
+/*
+ * Debug only.
+ */
+static void
+dt_print(struct driver_param_t *drv_param)
+{
+	u32 i = 0;
+	struct node_info_t *local_node = &drv_param->local_node;
+	struct node_info_t *peer_node = &drv_param->peer_node;
+
+	pr_debug("dt parsing leads to:\n");
+	pr_debug("\tdriver mode  = (%s)\n",
+		 ((drv_param->drv_mode == DRV_MODE_EPC) ? ("epc") : ("epf")));
+	pr_debug("\tpci dev id   = 0x%x\n", drv_param->pci_dev_id);
+	pr_debug("\tNode information\n");
+	pr_debug("\t\tlocal board id = %u\n", local_node->board_id);
+	pr_debug("\t\tpeer board id  = %u\n", peer_node->board_id);
+	pr_debug("\t\tlocal soc id   = %u\n", local_node->soc_id);
+	pr_debug("\t\tpeer soc id    = %u\n", peer_node->soc_id);
+	pr_debug("\t\tlocal pcie cntrlr id = %u\n", local_node->cntrlr_id);
+	pr_debug("\t\tpeer pcie cntrlr id  = %u\n", peer_node->cntrlr_id);
+	if (drv_param->drv_mode == DRV_MODE_EPF)
+		pr_debug("\tbar win size = 0x%x\n", drv_param->bar_win_size);
+	pr_debug("\ttotal endpoints	= (%u)\n", drv_param->nr_endpoint);
+	for (i = 0; i < drv_param->nr_endpoint; i++) {
+		struct endpoint_prop_t *prop = NULL;
+
+		prop = &drv_param->endpoint_props[i];
+		pr_debug("\t\t(%s)::\n", prop->name);
+		pr_debug("\t\t\tnframes   = (%02u) frame_size=(%08u)",
+			 prop->nframes, prop->frame_sz);
+	}
+	pr_debug("dt parsing ends\n");
+}
+
+/*
+ * helper function to tokenize the string with caller provided
+ * delimiter.
+ */
+static char *
+tokenize(char **input, const char *delim)
+{
+	/* skipping args check - internal api.*/
+
+	char *token = NULL;
+
+	token = strsep(input, delim);
+	if (!token) {
+		pr_err("Error parsing endpoint name\n");
+	} else {
+		/* remove any whitespaces. */
+		token = strim(token);
+		if (!token)
+			pr_err("Error trimming endpoint name\n");
+	}
+
+	return token;
+}
+
+/*
+ * helper function to tokenize the string with caller provided
+ * delimiter and provide the sting->uint8_t value.
+ *
+ * @param input is an in,out parameter.
+ *
+ */
+static int
+tokenize_u8(char **input, const char *delim,
+	    u32 base, u8 *value)
+{
+	int ret = 0;
+	char *token = NULL;
+
+	/* skipping args check - internal api.*/
+
+	token = tokenize(input, delim);
+	if (!token)
+		ret = -ENODATA;
+	else
+		ret = kstrtou8(token, base, value);
+
+	return ret;
+}
+
+/*
+ * helper function to tokenize the string with caller provided
+ * delimiter and provide the sting->u32 value.
+ *
+ * @param input is an in,out parameter.
+ *
+ */
+static int
+tokenize_u32(char **input, const char *delim,
+	     u32 base, u32 *value)
+{
+	int ret = 0;
+	char *token = NULL;
+
+	/* skipping args check - internal api.*/
+
+	token = tokenize(input, delim);
+	if (!token)
+		ret = -ENODATA;
+	else
+		ret = kstrtou32(token, base, value);
+
+	return ret;
+}
+
+/* find a compatible node carrying the pci_dev_id.*/
+static struct device_node*
+find_compatible_node(const char *compatible, u32 pci_dev_id)
+{
+	int ret = 0;
+	u32 ret_id = 0;
+	struct device_node *dn = NULL;
+	struct device_node *dn_found = NULL;
+
+	/* look all device nodes with matching compatible and pci-dev-id.*/
+	while ((dn = of_find_compatible_node(dn, NULL, compatible)) != NULL) {
+		if (of_device_is_available(dn) == false)
+			continue;
+
+		ret = of_property_read_u32(dn, PCI_DEV_ID_PROP_NAME, &ret_id);
+		if (ret < 0) {
+			pr_err("Failed to read: (%s) from device node: (%s)\n",
+			       PCI_DEV_ID_PROP_NAME, dn->name);
+			of_node_put(dn);
+			goto err;
+		}
+
+		if (ret_id == pci_dev_id) {
+			if (dn_found) {
+				ret = -EINVAL;
+				pr_err("pci-dev-id: (0x%x) first repeated in:(%s)\n",
+				       ret_id, dn->name);
+				of_node_put(dn);
+				goto err;
+			} else {
+				dn_found = dn;
+			}
+		}
+	}
+
+	if (!dn_found) {
+		ret = -EINVAL;
+		pr_err("Matching pci-dev-id: (0x%x) not found\n", pci_dev_id);
+		goto err;
+	}
+
+	return dn_found;
+
+err:
+	return ERR_PTR(ret);
+}
+
+/* Parse the host1x phandle and create host1x pdev.*/
+static int
+parse_host1x_phandle(struct driver_param_t *drv_param)
+{
+	int ret = 0;
+	struct device_node *np = NULL;
+
+	np = drv_param->pdev->dev.of_node;
+
+	drv_param->host1x_np =
+			of_parse_phandle(np, HOST1X_PHANDLE_PROP_NAME, 0);
+	if (!drv_param->host1x_np) {
+		ret = -EINVAL;
+		pr_err("Error parsing host1x phandle property: (%s)\n",
+		       HOST1X_PHANDLE_PROP_NAME);
+	} else {
+		drv_param->host1x_pdev =
+				of_find_device_by_node(drv_param->host1x_np);
+		if (!drv_param->host1x_pdev) {
+			ret = -ENODEV;
+			pr_err("Host1x device not available\n");
+		}
+	}
+
+	return ret;
+}
+
+/* Parse the pcie-edma phandle.*/
+static int
+parse_edma_phandle(struct driver_param_t *drv_param)
+{
+	int ret = 0;
+	struct device_node *np = NULL;
+
+	np = drv_param->pdev->dev.of_node;
+
+	drv_param->edma_np = of_parse_phandle(np, EDMA_PHANDLE_PROP_NAME, 0);
+	if (!drv_param->edma_np) {
+		ret = -EINVAL;
+		pr_err("Error parsing pcie-edma phandle property: (%s)\n",
+		       EDMA_PHANDLE_PROP_NAME);
+	}
+
+	return ret;
+}
+
+/* Parse the pci device id.*/
+static int
+parse_pci_dev_id(struct driver_param_t *drv_param)
+{
+	int ret = 0;
+	struct device_node *np = NULL;
+
+	np = drv_param->pdev->dev.of_node;
+
+	ret = of_property_read_u32(np, PCI_DEV_ID_PROP_NAME,
+				   &drv_param->pci_dev_id);
+	if (ret) {
+		pr_err("Error parsing pci dev id prop:(%s)\n",
+		       PCI_DEV_ID_PROP_NAME);
+		goto err;
+	}
+
+	/* validate.*/
+	if (drv_param->pci_dev_id != PCI_DEVICE_ID_C2C_1 &&
+	    drv_param->pci_dev_id != PCI_DEVICE_ID_C2C_2 &&
+	    drv_param->pci_dev_id != PCI_DEVICE_ID_C2C_3) {
+		pr_err("Invalid value for property: (%s)\n",
+		       PCI_DEV_ID_PROP_NAME);
+		goto err;
+	}
+
+err:
+	return ret;
+}
+
+static int
+validate_node_information(struct driver_param_t *drv_param)
+{
+	struct node_info_t *local_node = NULL;
+	struct node_info_t *peer_node = NULL;
+
+	local_node = &drv_param->local_node;
+	peer_node = &drv_param->peer_node;
+
+	if (local_node->board_id >= MAX_BOARDS ||
+	    peer_node->board_id >= MAX_BOARDS) {
+		pr_err("Board Ids must be in the range [0, %u]\n", MAX_BOARDS);
+		return -EINVAL;
+	}
+	if (local_node->soc_id >= MAX_SOCS ||
+	    peer_node->soc_id >= MAX_SOCS) {
+		pr_err("SoC Ids must be in the range [0, %u]\n", MAX_SOCS);
+		return -EINVAL;
+	}
+	if (local_node->cntrlr_id >= MAX_PCIE_CNTRLRS ||
+	    peer_node->cntrlr_id >= MAX_PCIE_CNTRLRS) {
+		pr_err("PCIe controller Ids must be in the range [0, %u]\n",
+		       MAX_PCIE_CNTRLRS);
+		return -EINVAL;
+	}
+
+	/*
+	 * From the node information, we must have either
+	 * one of the three properties different between
+	 * local and peer.
+	 * Same board, same SoC, different controller
+	 * Same board, different SoC, same controller
+	 * likewise.
+	 *
+	 * Essentially the tuple of board+soc+cntrlr shouldn't
+	 * be same for local and peer.
+	 */
+	if (local_node->board_id == peer_node->board_id &&
+	    local_node->soc_id == peer_node->soc_id &&
+	    local_node->cntrlr_id == peer_node->cntrlr_id)
+		return -EINVAL;
+
+	return 0;
+}
+
+/* Parse the node information: board, soc, controller information.*/
+static int
+parse_node_info(struct driver_param_t *drv_param)
+{
+	int ret = 0;
+	struct device_node *np = NULL;
+	struct node_info_t *local_node = NULL;
+	struct node_info_t *peer_node = NULL;
+
+	np = drv_param->pdev->dev.of_node;
+	local_node = &drv_param->local_node;
+	peer_node = &drv_param->peer_node;
+
+	/* board-id: local and peer.*/
+	ret = of_property_read_u32_index(np, BOARD_ID_PROP_NAME, 0,
+					 &local_node->board_id);
+	if (ret == 0) {
+		ret = of_property_read_u32_index(np, BOARD_ID_PROP_NAME, 1,
+						 &peer_node->board_id);
+	}
+	if (ret) {
+		pr_err("Error parsing board id prop:(%s) information\n",
+		       BOARD_ID_PROP_NAME);
+		goto err;
+	}
+
+	/* soc-id: local and peer.*/
+	ret = of_property_read_u32_index(np, SOC_ID_PROP_NAME, 0,
+					 &local_node->soc_id);
+	if (ret == 0) {
+		ret = of_property_read_u32_index(np, SOC_ID_PROP_NAME, 1,
+						 &peer_node->soc_id);
+	}
+	if (ret) {
+		pr_err("Error parsing soc id prop:(%s) information\n",
+		       SOC_ID_PROP_NAME);
+		goto err;
+	}
+
+	/* pcie controller-id: local and peer.*/
+	ret = of_property_read_u32_index(np, CNTRLR_ID_PROP_NAME, 0,
+					 &local_node->cntrlr_id);
+	if (ret == 0) {
+		ret = of_property_read_u32_index(np, CNTRLR_ID_PROP_NAME, 1,
+						 &peer_node->cntrlr_id);
+	}
+	if (ret) {
+		pr_err("Error parsing pcie controller id prop:(%s) information\n",
+		       CNTRLR_ID_PROP_NAME);
+		goto err;
+	}
+
+	ret = validate_node_information(drv_param);
+	if (ret) {
+		pr_err("Node information for board:soc:cntrlr is not sane\n");
+		goto err;
+	}
+
+err:
+	return ret;
+}
+
+/* Parse the bar-window-size.*/
+static int
+parse_bar_win_size(struct driver_param_t *drv_param)
+{
+	int ret = 0;
+	struct device_node *np = NULL;
+
+	np = drv_param->pdev->dev.of_node;
+
+	/* bar-win-size should be checked only when running as epf.*/
+	ret = of_property_read_u32(np, BAR_WIN_SZ_PROP_NAME,
+				   &drv_param->bar_win_size);
+	if (drv_param->drv_mode == DRV_MODE_EPF) {
+		if (ret) {
+			ret = -EINVAL;
+			pr_err("Error parsing bar window size prop:(%s)\n",
+			       BAR_WIN_SZ_PROP_NAME);
+		}
+	} else {
+		/* success is not expected for EPC.*/
+		if (ret == 0) {
+			ret = -EINVAL;
+			pr_err("Property (%s): must be present only with (%s)\n",
+			       BAR_WIN_SZ_PROP_NAME, COMPATIBLE_EPF_PROP_VAL);
+			goto err;
+		}
+		/* proceed, as error is expected with EPC (node absent).*/
+		ret = 0;
+		goto err;
+	}
+
+	/* validate. */
+	if (!drv_param->bar_win_size) {
+		ret = -EINVAL;
+		pr_err("Invalid BAR window size: (%u)\n",
+		       drv_param->bar_win_size);
+		goto err;
+	}
+	if (drv_param->bar_win_size & (drv_param->bar_win_size - 1)) {
+		ret = -EINVAL;
+		pr_err("BAR window size: (%u) not a power of 2\n",
+		       drv_param->bar_win_size);
+		goto err;
+	}
+	if (drv_param->bar_win_size < MIN_BAR_WIN_SZ) {
+		ret = -EINVAL;
+		pr_err("BAR window size: (%u) less than minimum: (%u)\n",
+		       drv_param->bar_win_size, MIN_BAR_WIN_SZ);
+		goto err;
+	}
+err:
+	return ret;
+}
+
+/*
+ * helper function to validate per-endpoint parameters:
+ * nframes and frame_size primarily.
+ *
+ * Add more when required (probably crypto, eDMA, etc.)
+ */
+static int
+validate_endpoint_prop(struct endpoint_prop_t *prop)
+{
+	int ret = 0;
+
+	/* skipping args check - internal api.*/
+
+	if ((prop->name[0] == '\0')) {
+		ret = -EINVAL;
+		pr_err("Endpoint must have a name\n");
+	} else if (prop->nframes == 0) {
+		ret = -EINVAL;
+		pr_err("(%s): Invalid number of frames\n", prop->name);
+	} else if (prop->frame_sz == 0) {
+		ret = -EINVAL;
+		pr_err("(%s): Invalid frame size\n", prop->name);
+	} else if ((prop->frame_sz & (FRAME_SZ_ALIGN - 1)) != 0) {
+		ret = -EINVAL;
+		pr_err("(%s): Frame size unaligned to (%u)\n",
+		       prop->name, FRAME_SZ_ALIGN);
+	} else if (prop->frame_sz > MAX_FRAME_SZ) {
+		ret = -EINVAL;
+		pr_err("(%s): Frame size greater than: (%u)\n",
+		       prop->name, (MAX_FRAME_SZ));
+	} else if (prop->nframes > MAX_NFRAMES) {
+		ret = -EINVAL;
+		pr_err("(%s): Number of frames greater than: (%u)\n",
+		       prop->name, (MAX_NFRAMES));
+	}
+
+	return ret;
+}
+
+/*
+ * Parse all the endpoint information available in DT property
+ * of nvscic2c-pcie dt node.
+ */
+static int
+parse_endpoint_db(struct driver_param_t *drv_param)
+{
+	int ret = 0;
+	u8 nr_endpoint = 0;
+	struct device_node *np = NULL;
+
+	np = drv_param->pdev->dev.of_node;
+
+	ret = of_property_count_strings(np, ENDPOINT_DB_PROP_NAME);
+	if (ret < 0) {
+		pr_err("Failed to query endpoint count from property: (%s)\n",
+		       ENDPOINT_DB_PROP_NAME);
+		return -EFAULT;
+	}
+	nr_endpoint = ret;
+
+	if (nr_endpoint == 0) {
+		ret = -EINVAL;
+		pr_err("No endpoint information in property: (%s)\n",
+		       ENDPOINT_DB_PROP_NAME);
+		goto err;
+	} else if (nr_endpoint > MAX_ENDPOINTS) {
+		ret = -EINVAL;
+		pr_err("Invalid endpoint count:(%u) from property: (%s)\n",
+		       nr_endpoint, ENDPOINT_DB_PROP_NAME);
+		goto err;
+
+	} else {
+		u8 i = 0;
+		char *inp = NULL;
+		u32 base = 10;
+		const char *entry = NULL;
+		struct property *prop = NULL;
+		char entry_dup[MAX_PROP_LEN] = {0};
+
+		/* for each endpoint entry in endpointdb.*/
+		of_property_for_each_string(np, ENDPOINT_DB_PROP_NAME,
+					    prop, entry) {
+			char *name = NULL;
+			struct endpoint_prop_t *ep_prop =
+				&drv_param->endpoint_props[i];
+
+			/*
+			 * per endpoint entry in endpointdb is longer than
+			 * expected.
+			 */
+			if (strlen(entry) > (MAX_PROP_LEN - 1)) {
+				ret = -EINVAL;
+				pr_err("Endpoint entry invalid\n");
+				break;
+			}
+			memcpy(entry_dup, entry, (strlen(entry)));
+			inp = &entry_dup[0];
+
+			/* parse endpoint name.*/
+			name = tokenize(&inp, ",");
+			if (!name) {
+				ret = -EFAULT;
+				pr_err("Error parsing endpoint name\n");
+				break;
+			}
+			if (strlen(name) > (NAME_MAX - 1)) {
+				ret = -EINVAL;
+				pr_err("Endpoint name: (%s) long, max char:(%u)\n",
+				       name, (NAME_MAX - 1));
+				break;
+			}
+			strcpy(ep_prop->name, name);
+
+			/* parse number of frames.*/
+			ret = tokenize_u8(&inp, ",", base, &ep_prop->nframes);
+			if (ret) {
+				pr_err("Error parsing token nframes\n");
+				break;
+			}
+
+			/* parse size of each frame.*/
+			ret = tokenize_u32(&inp, ",", base, &ep_prop->frame_sz);
+			if (ret) {
+				pr_err("Error parsing token frame_sz\n");
+				break;
+			}
+
+			/* validate some basic properties of endpoint.*/
+			ret = validate_endpoint_prop(ep_prop);
+			if (ret) {
+				pr_err("(%s): endpoint has invalid properties\n",
+				       ep_prop->name);
+				break;
+			}
+
+			/* all okay, assign the id.*/
+			ep_prop->id = i;
+			i++;
+		}
+	}
+
+	/* all okay.*/
+	drv_param->nr_endpoint = nr_endpoint;
+
+err:
+	return ret;
+}
+
+/*
+ * Look-up device tree node for the compatible string. Check for the
+ * pci-dev-id within the compatible node, if more than one such node found also
+ * return error.
+ */
+int
+dt_parse(u32 pci_dev_id, enum drv_mode_t drv_mode,
+	 struct driver_param_t *drv_param)
+{
+	int ret = 0;
+	char *compatible = NULL;
+	struct device_node *dn = NULL;
+
+	if (WARN_ON(!pci_dev_id))
+		return -EINVAL;
+
+	if (WARN_ON(!drv_param))
+		return -EINVAL;
+
+	if (drv_mode == DRV_MODE_EPC)
+		compatible = COMPATIBLE_EPC_PROP_VAL;
+	else if (drv_mode == DRV_MODE_EPF)
+		compatible = COMPATIBLE_EPF_PROP_VAL;
+	else
+		return -EINVAL;
+
+	dn = find_compatible_node(compatible, pci_dev_id);
+	if (IS_ERR_OR_NULL(dn)) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	memset(drv_param, 0x0, sizeof(*drv_param));
+	drv_param->drv_mode = drv_mode;
+
+	/* dn may not have refcount, by doing this we exlpicitly have one.*/
+	drv_param->pdev = of_find_device_by_node(dn);
+	if (!drv_param->pdev) {
+		pr_err("Failed to find platform device for: (0x%x)\n",
+		       pci_dev_id);
+		goto err;
+	}
+	drv_param->of_node = drv_param->pdev->dev.of_node;
+
+	ret = parse_host1x_phandle(drv_param);
+	if (ret)
+		goto err;
+
+	ret = parse_edma_phandle(drv_param);
+	if (ret)
+		goto err;
+
+	ret = parse_pci_dev_id(drv_param);
+	if (ret)
+		goto err;
+
+	ret = parse_node_info(drv_param);
+	if (ret)
+		goto err;
+
+	ret = parse_bar_win_size(drv_param);
+	if (ret)
+		goto err;
+
+	ret = parse_endpoint_db(drv_param);
+	if (ret)
+		goto err;
+
+	/* all okay.*/
+	dt_print(drv_param);
+	return ret;
+err:
+	dt_release(drv_param);
+	return ret;
+}
+
+/*
+ * to free-up any memory and decrement ref_count of device nodes
+ * accessed.
+ */
+int
+dt_release(struct driver_param_t *drv_param)
+{
+	int ret = 0;
+
+	if (!drv_param)
+		return ret;
+
+	if (drv_param->host1x_pdev) {
+		platform_device_put(drv_param->host1x_pdev);
+		drv_param->host1x_pdev = NULL;
+	}
+	if (drv_param->host1x_np) {
+		of_node_put(drv_param->host1x_np);
+		drv_param->host1x_np = NULL;
+	}
+	if (drv_param->edma_np) {
+		of_node_put(drv_param->edma_np);
+		drv_param->edma_np = NULL;
+	}
+	if (drv_param->pdev) {
+		platform_device_put(drv_param->pdev);
+		drv_param->pdev = NULL;
+	}
+	return ret;
+}
diff --git a/drivers/misc/nvscic2c-pcie/endpoint.c b/drivers/misc/nvscic2c-pcie/endpoint.c
new file mode 100644
index 00000000..596a166a
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/endpoint.c
@@ -0,0 +1,1214 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#define pr_fmt(fmt)	"nvscic2c-pcie: endpoint: " fmt
+
+#include <linux/atomic.h>
+#include <linux/cdev.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-iommu.h>
+#include <linux/errno.h>
+#include <linux/host1x-next.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+#include <uapi/misc/nvscic2c-pcie-ioctl.h>
+
+#include "common.h"
+#include "endpoint.h"
+#include "module.h"
+#include "pci-client.h"
+#include "stream-extensions.h"
+
+#define PCIE_STATUS_CHANGE_ACK_TIMEOUT (2000)
+
+/*
+ * Masked offsets to return to user, allowing them to mmap
+ * different memory segments of endpoints in user-space.
+ */
+enum mem_mmap_type {
+	/* Invalid.*/
+	MEM_MMAP_INVALID = 0,
+	/* Map Peer PCIe aperture: For Tx across PCIe.*/
+	PEER_MEM_MMAP,
+	/* Map Self PCIe shared memory: For Rx across PCIe.*/
+	SELF_MEM_MMAP,
+	/* Map Link memory segment to query link status with Peer.*/
+	LINK_MEM_MMAP,
+	/* Maximum. */
+	MEM_MAX_MMAP,
+};
+
+/* syncpoint handling. */
+struct syncpt_t {
+	u32 id;
+	u32 threshold;
+	struct host1x_syncpt *sp;
+
+	/* PCIe aperture for writes to peer syncpoint for same the endpoint. */
+	struct pci_aper_t peer_mem;
+
+	/* syncpoint physical address for stritching to PCIe BAR backing.*/
+	size_t size;
+	phys_addr_t phy_addr;
+
+	/* for mapping above physical pages to iova of client choice.*/
+	void *iova_block_h;
+	u64 iova;
+	bool mapped_iova;
+
+	bool host1x_cb_set;
+	/* Lock to protect fences between callback and deinit. */
+	struct mutex lock;
+	/* Fence to specific Threshold. */
+	struct dma_fence *fence;
+	struct dma_fence_cb fence_cb;
+	/* Work to notify and allocate new fence. */
+	struct work_struct work;
+	void (*notifier)(void *data);
+	void *notifier_data;
+	bool fence_release;
+};
+
+/* private data structure for each endpoint. */
+struct endpoint_t {
+	/* properties / attributes of this endpoint.*/
+	char name[NAME_MAX];
+
+	/* char device management.*/
+	u32 minor;
+	dev_t dev;
+	struct cdev cdev;
+	struct device *device;
+
+	/* slot/frames this endpoint is divided into honoring alignment.*/
+	u32 nframes;
+	u32 frame_sz;
+
+	/* allocated physical memory info for mmap.*/
+	struct cpu_buff_t self_mem;
+
+	/* mapping physical pages to iova of client choice.*/
+	void *iova_block_h;
+	u64 iova;
+	bool mapped_iova;
+
+	/* PCIe aperture for writes to peer over pcie. */
+	struct pci_aper_t peer_mem;
+
+	/* poll/notifications.*/
+	wait_queue_head_t poll_waitq;
+
+	/* syncpoint shim for notifications (rx). */
+	struct syncpt_t syncpt;
+
+	/* msi irq to x86 RP */
+	u16 msi_irq;
+
+	/* book-keeping of peer notifications.*/
+	atomic_t dataevent_count;
+
+	/* book-keeping of PCIe link event.*/
+	atomic_t linkevent_count;
+	u32 linkevent_id;
+
+	/* propagate events when endpoint was initialized.*/
+	atomic_t event_handling;
+
+	/* serialise access to fops.*/
+	struct mutex fops_lock;
+	atomic_t in_use;
+	wait_queue_head_t close_waitq;
+
+	/* when the endpoints are undergoing shutdown.*/
+	atomic_t shutdown;
+
+	/* signal eps driver context on ep in use.*/
+	atomic_t *eps_in_use;
+	wait_queue_head_t *eps_close_waitq;
+
+	/* pci client handle.*/
+	void *pci_client_h;
+
+	/* stream extensions.*/
+	struct stream_ext_params stream_ext_params;
+	void *stream_ext_h;
+	struct platform_device *host1x_pdev;
+};
+
+/* Overall context for the endpoint sub-module of  nvscic2c-pcie driver.*/
+struct endpoint_drv_ctx_t {
+	char drv_name[NAME_MAX];
+
+	/* entire char device region allocated for all endpoints.*/
+	dev_t char_dev;
+
+	/* every endpoint char device will be registered to this class.*/
+	struct class *class;
+
+	/* array of nvscic2c-pcie endpoint logical devices.*/
+	u8 nr_endpoint;
+	struct endpoint_t *endpoints;
+
+	/* nvscic2c-pcie DT node reference, used in getting syncpoint shim. */
+	struct device_node *of_node;
+
+	/* total count of endpoints opened/in-use.*/
+	atomic_t eps_in_use;
+	wait_queue_head_t eps_close_waitq;
+};
+
+/*
+ * pci-client would raise this callback only when there is change
+ * in PCIe link status(up->down OR down->up).
+ */
+static void
+link_event_callback(void *event_type, void *ctx);
+
+/* prototype. */
+static void
+enable_event_handling(struct endpoint_t *endpoint);
+
+/* prototype. */
+static int
+disable_event_handling(struct endpoint_t *endpoint);
+
+/* prototype. */
+static int
+ioctl_notify_remote_impl(struct endpoint_t *endpoint);
+
+/* prototype. */
+static int
+ioctl_get_info_impl(struct endpoint_t *endpoint,
+		    struct nvscic2c_pcie_endpoint_info *get_info);
+
+/*
+ * open() syscall backing for nvscic2c-pcie endpoint devices.
+ *
+ * Populate the endpoint_device internal data-structure into fops private data
+ * for subsequent calls to other fops handlers.
+ */
+static int
+endpoint_fops_open(struct inode *inode, struct file *filp)
+{
+	int ret = 0;
+	enum nvscic2c_pcie_link link = NVSCIC2C_PCIE_LINK_DOWN;
+	struct endpoint_t *endpoint =
+		container_of(inode->i_cdev, struct endpoint_t, cdev);
+
+	mutex_lock(&endpoint->fops_lock);
+
+	if (atomic_read(&endpoint->in_use)) {
+		/* already in use.*/
+		ret = -EBUSY;
+		goto err;
+	}
+
+	if (atomic_read(&endpoint->shutdown)) {
+		/* do not open when module is undergoing shutdown.*/
+		ret = -ESHUTDOWN;
+		goto err;
+	}
+
+	link = pci_client_query_link_status(endpoint->pci_client_h);
+	if (link != NVSCIC2C_PCIE_LINK_UP) {
+		/* do not open when link is not established.*/
+		ret = -ENOLINK;
+		goto err;
+	}
+
+	/* create stream extension handle.*/
+	ret = stream_extension_init(&endpoint->stream_ext_params,
+				    &endpoint->stream_ext_h);
+	if (ret) {
+		pr_err("Failed setting up stream extension handle: (%s)\n",
+		       endpoint->name);
+		goto err;
+	}
+
+	/* start link, data event handling.*/
+	enable_event_handling(endpoint);
+
+	atomic_set(&endpoint->in_use, 1);
+	filp->private_data = endpoint;
+
+	/*
+	 * increment the total opened endpoints in endpoint_drv_ctx_t.
+	 * Doesn't need to be guarded in lock, atomic variable.
+	 */
+	atomic_inc(endpoint->eps_in_use);
+err:
+	mutex_unlock(&endpoint->fops_lock);
+	return ret;
+}
+
+/* close() syscall backing for nvscic2c-pcie endpoint devices.*/
+static int
+endpoint_fops_release(struct inode *inode, struct file *filp)
+{
+	int ret = 0;
+	struct endpoint_t *endpoint = filp->private_data;
+
+	if (!endpoint)
+		return ret;
+
+	mutex_lock(&endpoint->fops_lock);
+	filp->private_data = NULL;
+	disable_event_handling(endpoint);
+	stream_extension_deinit(&endpoint->stream_ext_h);
+	atomic_set(&endpoint->in_use, 0);
+	if (atomic_dec_and_test(endpoint->eps_in_use))
+		wake_up_interruptible_all(endpoint->eps_close_waitq);
+	mutex_unlock(&endpoint->fops_lock);
+
+	return ret;
+}
+
+/*
+ * mmap() syscall backing for nvscic2c-pcie endpoint device.
+ *
+ * We support mapping following distinct regions of memory:
+ * - Peer's memory for same endpoint(used for Tx),
+ * - Self's memory (used for Rx),
+ * - pci-client link status memory.
+ *
+ * We map just one segment of memory in each call based on the information
+ * (which memory segment) provided by user-space code.
+ */
+static int
+endpoint_fops_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct endpoint_t *endpoint = filp->private_data;
+	u64 mmap_type = vma->vm_pgoff;
+	u64 memaddr = 0x0;
+	u64 memsize = 0x0;
+	int ret = 0;
+
+	if (WARN_ON(!endpoint))
+		return -EFAULT;
+
+	if (WARN_ON(!(vma)))
+		return -EFAULT;
+
+	mutex_lock(&endpoint->fops_lock);
+	if (!atomic_read(&endpoint->in_use)) {
+		mutex_unlock(&endpoint->fops_lock);
+		return -EBADF;
+	}
+
+	switch (mmap_type) {
+	case PEER_MEM_MMAP:
+		vma->vm_page_prot = pgprot_device(vma->vm_page_prot);
+		memaddr = endpoint->peer_mem.aper;
+		memsize = endpoint->peer_mem.size;
+		break;
+	case SELF_MEM_MMAP:
+		memaddr = endpoint->self_mem.phys_addr;
+		memsize = endpoint->self_mem.size;
+		break;
+	case LINK_MEM_MMAP:
+		if (vma->vm_flags & VM_WRITE) {
+			ret = -EPERM;
+			pr_err("(%s): LINK_MEM_MMAP called with WRITE prot\n",
+			       endpoint->name);
+			goto exit;
+		}
+		ret = pci_client_mmap_link_mem(endpoint->pci_client_h, vma);
+		goto exit;
+	default:
+		pr_err("(%s): unrecognised mmap type: (%llu)\n",
+		       endpoint->name, mmap_type);
+		goto exit;
+	}
+
+	if ((vma->vm_end - vma->vm_start) != memsize) {
+		pr_err("(%s): mmap type: (%llu), memsize mismatch\n",
+		       endpoint->name, mmap_type);
+		goto exit;
+	}
+
+	vma->vm_pgoff  = 0;
+	vma->vm_flags |= (VM_DONTCOPY); // fork() not supported.
+	ret = remap_pfn_range(vma, vma->vm_start,
+			      PFN_DOWN(memaddr),
+			      memsize, vma->vm_page_prot);
+	if (ret) {
+		pr_err("(%s): mmap() failed, mmap type:(%llu)\n",
+		       endpoint->name, mmap_type);
+	}
+exit:
+	mutex_unlock(&endpoint->fops_lock);
+	return ret;
+}
+
+/*
+ * poll() syscall backing for nvscic2c-pcie endpoint devices.
+ *
+ * user-space code shall call poll with FD on read, write and probably exception
+ * for endpoint state changes.
+ *
+ * If we are able to read(), write() or there is a pending state change event
+ * to be serviced, we return letting application call get_event(), otherwise
+ * kernel f/w will wait for poll_waitq activity to occur.
+ */
+static __poll_t
+endpoint_fops_poll(struct file *filp, poll_table *wait)
+{
+	__poll_t mask = 0;
+	struct endpoint_t *endpoint = filp->private_data;
+
+	if (WARN_ON(!endpoint))
+		return POLLNVAL;
+
+	mutex_lock(&endpoint->fops_lock);
+	if (!atomic_read(&endpoint->in_use)) {
+		mutex_unlock(&endpoint->fops_lock);
+		return POLLNVAL;
+	}
+
+	/* add all waitq if they are different for read, write & link+state.*/
+	poll_wait(filp, &endpoint->poll_waitq, wait);
+
+	/*
+	 * wake up read, write (& exception - those who want to use) fd on
+	 * getting Link + peer notifications.
+	 */
+	if (atomic_read(&endpoint->linkevent_count)) {
+		atomic_dec(&endpoint->linkevent_count);
+		mask = (POLLPRI | POLLIN | POLLOUT);
+	} else if (atomic_read(&endpoint->dataevent_count)) {
+		atomic_dec(&endpoint->dataevent_count);
+		mask = (POLLPRI | POLLIN | POLLOUT);
+	}
+
+	mutex_unlock(&endpoint->fops_lock);
+
+	return mask;
+}
+
+/* ioctl() syscall backing for nvscic2c-pcie endpoint device. */
+#define MAX_IOCTL_ARG_SIZE (sizeof(union nvscic2c_pcie_ioctl_arg_max_size))
+static long
+endpoint_fops_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	int ret = 0;
+	u8 buf[MAX_IOCTL_ARG_SIZE] __aligned(sizeof(u64)) = {0};
+	struct endpoint_t *endpoint = filp->private_data;
+
+	if (WARN_ON(!endpoint))
+		return -EFAULT;
+
+	if (WARN_ON(_IOC_TYPE(cmd) != NVSCIC2C_PCIE_IOCTL_MAGIC ||
+		    _IOC_NR(cmd) == 0 ||
+		    _IOC_NR(cmd) > NVSCIC2C_PCIE_IOCTL_NUMBER_MAX) ||
+		    _IOC_SIZE(cmd) > MAX_IOCTL_ARG_SIZE)
+		return -ENOTTY;
+
+	/* copy the cmd if it was meant from user->kernel. */
+	(void)memset(buf, 0, sizeof(buf));
+	if (_IOC_DIR(cmd) & _IOC_WRITE) {
+		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
+			return -EFAULT;
+	}
+
+	mutex_lock(&endpoint->fops_lock);
+	if (!atomic_read(&endpoint->in_use)) {
+		mutex_unlock(&endpoint->fops_lock);
+		return -EBADF;
+	}
+	switch (cmd) {
+	case NVSCIC2C_PCIE_IOCTL_GET_INFO:
+		ret = ioctl_get_info_impl
+			(endpoint, (struct nvscic2c_pcie_endpoint_info *)buf);
+		break;
+	case NVSCIC2C_PCIE_IOCTL_NOTIFY_REMOTE:
+		ret = ioctl_notify_remote_impl(endpoint);
+		break;
+	default:
+		ret = stream_extension_ioctl(endpoint->stream_ext_h, cmd, buf);
+		break;
+	}
+	mutex_unlock(&endpoint->fops_lock);
+
+	/* copy the cmd result back to user if it was kernel->user: get_info.*/
+	if (ret == 0 && (_IOC_DIR(cmd) & _IOC_READ))
+		ret = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
+	return ret;
+}
+
+/*
+ * All important endpoint dev node properites required for user-space
+ * to map the channel memory and work without going to LKM for data
+ * xfer are exported in this ioctl implementation.
+ *
+ * Because we export different memory for a single nvscic2c-pcie endpoint,
+ * export the memory regions as masked offsets.
+ */
+static int
+ioctl_get_info_impl(struct endpoint_t *endpoint,
+		    struct nvscic2c_pcie_endpoint_info *get_info)
+{
+	if (endpoint->peer_mem.size > U32_MAX ||
+	    endpoint->self_mem.size > U32_MAX)
+		return -EINVAL;
+
+	get_info->nframes     = endpoint->nframes;
+	get_info->frame_size  = endpoint->frame_sz;
+	get_info->peer.offset = (PEER_MEM_MMAP << PAGE_SHIFT);
+	get_info->peer.size   = endpoint->peer_mem.size;
+	get_info->self.offset = (SELF_MEM_MMAP << PAGE_SHIFT);
+	get_info->self.size   = endpoint->self_mem.size;
+	get_info->link.offset = (LINK_MEM_MMAP << PAGE_SHIFT);
+	get_info->link.size   = PAGE_ALIGN(sizeof(enum nvscic2c_pcie_link));
+
+	return 0;
+}
+
+/*
+ * implement NVSCIC2C_PCIE_IOCTL_NOTIFY_REMOTE ioctl call.
+ */
+static int
+ioctl_notify_remote_impl(struct endpoint_t *endpoint)
+{
+	int ret = 0;
+	enum nvscic2c_pcie_link link = NVSCIC2C_PCIE_LINK_DOWN;
+	struct syncpt_t *syncpt = &endpoint->syncpt;
+	enum peer_cpu_t peer_cpu = NVCPU_ORIN;
+
+	link = pci_client_query_link_status(endpoint->pci_client_h);
+	peer_cpu =  pci_client_get_peer_cpu(endpoint->pci_client_h);
+
+	if (link != NVSCIC2C_PCIE_LINK_UP)
+		return -ENOLINK;
+
+	if (peer_cpu == NVCPU_X86_64) {
+		ret = pci_client_raise_irq(endpoint->pci_client_h, PCI_EPC_IRQ_MSI,
+					   endpoint->msi_irq);
+	} else {
+	/*
+	 * increment peer's syncpoint. Write of any 4-byte value
+	 * increments remote's syncpoint shim by 1.
+	 */
+		writel(0x1, syncpt->peer_mem.pva);
+	}
+
+	return ret;
+}
+
+static void
+enable_event_handling(struct endpoint_t *endpoint)
+{
+	/*
+	 * propagate link and state change events that occur after the device
+	 * is opened and not the stale ones.
+	 */
+	atomic_set(&endpoint->dataevent_count, 0);
+	atomic_set(&endpoint->linkevent_count, 0);
+	atomic_set(&endpoint->event_handling, 1);
+}
+
+static int
+disable_event_handling(struct endpoint_t *endpoint)
+{
+	int ret = 0;
+
+	if (!endpoint)
+		return ret;
+
+	atomic_set(&endpoint->event_handling, 0);
+	atomic_set(&endpoint->linkevent_count, 0);
+	atomic_set(&endpoint->dataevent_count, 0);
+
+	return ret;
+}
+
+static void
+link_event_callback(void *data, void *ctx)
+{
+	struct endpoint_t *endpoint = NULL;
+
+	if (!ctx) {
+		pr_err("Spurious link event callback\n");
+		return;
+	}
+
+	endpoint = (struct endpoint_t *)(ctx);
+
+	/* notify only if the endpoint was openend.*/
+	if (atomic_read(&endpoint->event_handling)) {
+		atomic_inc(&endpoint->linkevent_count);
+		wake_up_interruptible_all(&endpoint->poll_waitq);
+	}
+}
+
+static void
+host1x_cb_func(struct dma_fence *f, struct dma_fence_cb *cb)
+{
+	struct syncpt_t *syncpt = container_of(cb, struct syncpt_t, fence_cb);
+
+	schedule_work(&syncpt->work);
+}
+
+static int
+allocate_fence(struct syncpt_t *syncpt)
+{
+	int ret = 0;
+	struct dma_fence *fence = NULL;
+
+	fence = host1x_fence_create(syncpt->sp, ++syncpt->threshold, false);
+	if (IS_ERR(fence)) {
+		ret = PTR_ERR(fence);
+		pr_err("host1x_fence_create failed with: %d\n", ret);
+		return ret;
+	}
+
+	mutex_lock(&syncpt->lock);
+	ret = dma_fence_add_callback(fence, &syncpt->fence_cb, host1x_cb_func);
+	if (ret != 0) {
+		if (ret == -ENOENT) {
+			ret = 0;
+			schedule_work(&syncpt->work);
+		}
+		goto put_fence;
+	}
+	syncpt->fence = fence;
+	mutex_unlock(&syncpt->lock);
+
+	return ret;
+
+put_fence:
+	dma_fence_put(fence);
+	mutex_unlock(&syncpt->lock);
+	return ret;
+}
+
+static void
+fence_do_work(struct work_struct *work)
+{
+	int ret = 0;
+	struct syncpt_t *syncpt = container_of(work, struct syncpt_t, work);
+
+	if (syncpt->notifier)
+		syncpt->notifier(syncpt->notifier_data);
+
+	mutex_lock(&syncpt->lock);
+	/* If deinit triggered, no need to proceed. */
+	if (syncpt->fence_release)
+		return;
+	if (syncpt->fence) {
+		dma_fence_put(syncpt->fence);
+		syncpt->fence = NULL;
+	}
+	mutex_unlock(&syncpt->lock);
+
+	ret = allocate_fence(syncpt);
+	if (ret != 0) {
+		mutex_unlock(&syncpt->lock);
+		pr_err("allocate_fence failed with: %d\n", ret);
+		return;
+	}
+}
+
+/*
+ * Callback registered with Syncpoint shim, shall be invoked
+ * on expiry of syncpoint shim fence/trigger from remote.
+ */
+static void
+syncpt_callback(void *data)
+{
+	/* Skip args ceck, trusting host1x. */
+
+	struct endpoint_t *endpoint = (struct endpoint_t *)(data);
+
+	/* notify only if the endpoint was openend - else drain.*/
+	if (atomic_read(&endpoint->event_handling)) {
+		atomic_inc(&endpoint->dataevent_count);
+		wake_up_interruptible_all(&endpoint->poll_waitq);
+	}
+}
+
+/*
+ * unpin/unmap and free the syncpoints allocated.
+ */
+static void
+free_syncpoint(struct endpoint_drv_ctx_t *eps_ctx,
+	       struct endpoint_t *endpoint)
+{
+	int ret = 0;
+	struct syncpt_t *syncpt = NULL;
+
+	if (!eps_ctx || !endpoint)
+		return;
+
+	syncpt = &endpoint->syncpt;
+
+	if (syncpt->host1x_cb_set) {
+		/* Remove dma fence callback. */
+		mutex_lock(&syncpt->lock);
+		syncpt->fence_release = true;
+		if (syncpt->fence) {
+			ret = dma_fence_remove_callback(syncpt->fence,
+							&syncpt->fence_cb);
+			if (ret) {
+				/*
+				 * If dma_fence_remove_callback() returns true
+				 * means callback is removed successfully.
+				 * Cancel the fence to drop the refcount.
+				 */
+				host1x_fence_cancel(syncpt->fence);
+			}
+			dma_fence_put(syncpt->fence);
+			syncpt->fence = NULL;
+		}
+		mutex_unlock(&syncpt->lock);
+		cancel_work_sync(&syncpt->work);
+		mutex_destroy(&syncpt->lock);
+	}
+	syncpt->host1x_cb_set = false;
+
+	if (syncpt->peer_mem.pva) {
+		iounmap(syncpt->peer_mem.pva);
+		syncpt->peer_mem.pva = NULL;
+	}
+
+	if (syncpt->mapped_iova) {
+		pci_client_unmap_addr(endpoint->pci_client_h,
+				      syncpt->iova, syncpt->size);
+		syncpt->mapped_iova = false;
+	}
+
+	if (syncpt->iova_block_h) {
+		pci_client_free_iova(endpoint->pci_client_h,
+				     &syncpt->iova_block_h);
+		syncpt->iova_block_h = NULL;
+	}
+
+	if (syncpt->sp) {
+		host1x_syncpt_put(syncpt->sp);
+		syncpt->sp = NULL;
+	}
+}
+
+/* Allocate syncpoint shim for the endpoint. Subsequently, map/pin
+ * them to PCIe BAR backing.
+ */
+static int
+allocate_syncpoint(struct endpoint_drv_ctx_t *eps_ctx,
+		   struct endpoint_t *endpoint)
+{
+	int ret = 0;
+	int prot = 0;
+	struct host1x *host1x = NULL;
+	struct syncpt_t *syncpt = NULL;
+	size_t offsetof = 0x0;
+
+	syncpt = &endpoint->syncpt;
+
+	host1x = platform_get_drvdata(endpoint->host1x_pdev);
+	if (!host1x) {
+		pr_err("Could not get host1x handle from host1x_pdev.\n");
+		return -EINVAL;
+	}
+
+	syncpt->sp = host1x_syncpt_alloc(host1x, HOST1X_SYNCPT_CLIENT_MANAGED,
+					 endpoint->name);
+	if (IS_ERR_OR_NULL(syncpt->sp)) {
+		pr_err("(%s): Failed to reserve syncpt\n", endpoint->name);
+		return -ENOMEM;
+	}
+
+	syncpt->id = host1x_syncpt_id(syncpt->sp);
+	/* physical address of syncpoint shim. */
+	syncpt->phy_addr = get_syncpt_shim_offset(syncpt->id);
+	syncpt->size = SP_MAP_SIZE;
+
+	/* reserve iova with the iova manager.*/
+	ret = pci_client_alloc_iova(endpoint->pci_client_h, syncpt->size,
+				    &syncpt->iova, &offsetof,
+				    &syncpt->iova_block_h);
+	if (ret) {
+		pr_err("(%s): Err reserving iova region of size(SP): (%lu)\n",
+		       endpoint->name, syncpt->size);
+		goto err;
+	}
+
+	/* map the pages to the reserved iova.*/
+	prot = (IOMMU_CACHE | IOMMU_READ | IOMMU_WRITE);
+	ret = pci_client_map_addr(endpoint->pci_client_h, syncpt->iova,
+				  syncpt->phy_addr, syncpt->size, prot);
+	if (ret) {
+		pr_err("(%s): Failed to map SP physical addr to reserved iova\n",
+		       endpoint->name);
+		goto err;
+	}
+	syncpt->mapped_iova = true;
+
+	pr_debug("(%s): mapped phy:0x%pa[p]+0x%lx to iova:0x%llx\n",
+		 endpoint->name, &syncpt->phy_addr, syncpt->size, syncpt->iova);
+
+	/* get peer's aperture offset. Map tx (pcie aper for notif tx.)*/
+	syncpt->peer_mem.size = syncpt->size;
+	ret = pci_client_get_peer_aper(endpoint->pci_client_h, offsetof,
+				       syncpt->peer_mem.size,
+				       &syncpt->peer_mem.aper);
+	if (ret) {
+		pr_err("Failed to get comm peer's syncpt pcie aperture\n");
+		goto err;
+	}
+
+	syncpt->peer_mem.pva = ioremap(syncpt->peer_mem.aper,
+				       syncpt->peer_mem.size);
+	if (!syncpt->peer_mem.pva) {
+		ret = -ENOMEM;
+		pr_err("(%s): Failed to ioremap peer's syncpt pcie aperture\n",
+		       endpoint->name);
+		goto err;
+	}
+
+	syncpt->threshold = host1x_syncpt_read(syncpt->sp);
+
+	/* enable syncpt notifications handling from peer.*/
+	mutex_init(&syncpt->lock);
+	syncpt->notifier = syncpt_callback;
+	syncpt->notifier_data = (void *)endpoint;
+	INIT_WORK(&syncpt->work, fence_do_work);
+	syncpt->host1x_cb_set = true;
+	syncpt->fence_release = false;
+
+	ret = allocate_fence(syncpt);
+	if (ret != 0) {
+		pr_err("allocate_fence failed with: %d\n", ret);
+		goto err;
+	}
+
+	return ret;
+err:
+	free_syncpoint(eps_ctx, endpoint);
+	return ret;
+}
+
+/* unmap the memory from PCIe BAR iova and free the allocated physical pages. */
+static void
+free_memory(struct endpoint_drv_ctx_t *eps_ctx, struct endpoint_t *endpoint)
+{
+	if (!eps_ctx || !endpoint)
+		return;
+
+	if (endpoint->mapped_iova) {
+		pci_client_unmap_addr(endpoint->pci_client_h,
+				      endpoint->iova, endpoint->self_mem.size);
+		endpoint->mapped_iova = false;
+	}
+
+	if (endpoint->iova_block_h) {
+		pci_client_free_iova(endpoint->pci_client_h,
+				     &endpoint->iova_block_h);
+		endpoint->iova_block_h = NULL;
+	}
+
+	if (endpoint->self_mem.pva) {
+		free_pages_exact(endpoint->self_mem.pva,
+				 endpoint->self_mem.size);
+		endpoint->self_mem.pva = NULL;
+	}
+}
+
+/*
+ * allocate coniguous physical memory for endpoint. This shall be mapped
+ * to PCIe BAR iova.
+ */
+static int
+allocate_memory(struct endpoint_drv_ctx_t *eps_ctx, struct endpoint_t *ep)
+{
+	int ret = 0;
+	int prot = 0;
+	size_t offsetof = 0x0;
+
+	/*
+	 * memory size includes space for frames(aligned to PAGE_SIZE) plus
+	 * one additional PAGE for frames header (managed/used by user-space).
+	 */
+	ep->self_mem.size = (ep->nframes * ep->frame_sz);
+	ep->self_mem.size = ALIGN(ep->self_mem.size, PAGE_SIZE);
+	ep->self_mem.size += PAGE_SIZE;
+	ep->self_mem.pva = alloc_pages_exact(ep->self_mem.size,
+					     (GFP_KERNEL | __GFP_ZERO));
+	if (!ep->self_mem.pva) {
+		ret = -ENOMEM;
+		pr_err("(%s): Error allocating: (%lu) contiguous pages\n",
+		       ep->name, (ep->self_mem.size >> PAGE_SHIFT));
+		goto err;
+	}
+	ep->self_mem.phys_addr = page_to_phys(virt_to_page(ep->self_mem.pva));
+	pr_debug("(%s): physical page allocated at:(0x%pa[p]+0x%lx)\n",
+		 ep->name, &ep->self_mem.phys_addr, ep->self_mem.size);
+
+	/* reserve iova with the iova manager.*/
+	ret = pci_client_alloc_iova(ep->pci_client_h, ep->self_mem.size,
+				    &ep->iova, &offsetof, &ep->iova_block_h);
+	if (ret) {
+		pr_err("(%s): Failed to reserve iova region of size: 0x%lx\n",
+		       ep->name, ep->self_mem.size);
+		goto err;
+	}
+
+	/* map the pages to the reserved iova.*/
+	prot = (IOMMU_CACHE | IOMMU_READ | IOMMU_WRITE);
+	ret = pci_client_map_addr(ep->pci_client_h, ep->iova,
+				  ep->self_mem.phys_addr, ep->self_mem.size,
+				  prot);
+	if (ret) {
+		pr_err("(%s): Failed to map physical page to reserved iova\n",
+		       ep->name);
+		goto err;
+	}
+	ep->mapped_iova = true;
+
+	pr_debug("(%s): mapped page:0x%pa[p]+0x%lx to iova:0x%llx\n", ep->name,
+		 &ep->self_mem.phys_addr, ep->self_mem.size, ep->iova);
+
+	/* get peer's aperture offset. Used in mmaping tx mem.*/
+	ep->peer_mem.size = ep->self_mem.size;
+	ret = pci_client_get_peer_aper(ep->pci_client_h, offsetof,
+				       ep->peer_mem.size, &ep->peer_mem.aper);
+	if (ret) {
+		pr_err("Failed to get peer's endpoint pcie aperture\n");
+		goto err;
+	}
+
+	return ret;
+err:
+	free_memory(eps_ctx, ep);
+	return ret;
+}
+
+/*
+ * Set of per-endpoint char device file operations. Do not support:
+ * read() and write() on nvscic2c-pcie endpoint descriptors.
+ */
+static const struct file_operations endpoint_fops = {
+	.owner          = THIS_MODULE,
+	.open           = endpoint_fops_open,
+	.release        = endpoint_fops_release,
+	.mmap           = endpoint_fops_mmap,
+	.unlocked_ioctl = endpoint_fops_ioctl,
+	.poll           = endpoint_fops_poll,
+	.llseek         = noop_llseek,
+};
+
+/* Clean up the endpoint devices. */
+static int
+remove_endpoint_device(struct endpoint_drv_ctx_t *eps_ctx,
+		       struct endpoint_t *endpoint)
+{
+	int ret = 0;
+
+	if (!eps_ctx || !endpoint)
+		return ret;
+
+	pci_client_unregister_for_link_event(endpoint->pci_client_h,
+					     endpoint->linkevent_id);
+	free_syncpoint(eps_ctx, endpoint);
+	free_memory(eps_ctx, endpoint);
+	if (endpoint->device) {
+		device_destroy(eps_ctx->class, endpoint->dev);
+		cdev_del(&endpoint->cdev);
+		endpoint->device = NULL;
+	}
+	mutex_destroy(&endpoint->fops_lock);
+	return ret;
+}
+
+/* Create the nvscic2c-pcie endpoint devices for the user-space to:
+ * - Map the endpoints Self and Peer area.
+ * - send notifications to remote/peer.
+ * - receive notifications from peer.
+ */
+static int
+create_endpoint_device(struct endpoint_drv_ctx_t *eps_ctx,
+		       struct endpoint_t *endpoint)
+{
+	int ret = 0;
+	struct callback_ops ops = {0};
+
+	/* initialise the endpoint internals.*/
+	mutex_init(&endpoint->fops_lock);
+	atomic_set(&endpoint->in_use, 0);
+	atomic_set(&endpoint->shutdown, 0);
+	init_waitqueue_head(&endpoint->poll_waitq);
+	init_waitqueue_head(&endpoint->close_waitq);
+
+	/* create the nvscic2c endpoint char device.*/
+	endpoint->dev = MKDEV(MAJOR(eps_ctx->char_dev), endpoint->minor);
+	cdev_init(&endpoint->cdev, &endpoint_fops);
+	endpoint->cdev.owner = THIS_MODULE;
+	ret = cdev_add(&endpoint->cdev, endpoint->dev, 1);
+	if (ret != 0) {
+		pr_err("(%s): cdev_add() failed\n", endpoint->name);
+		goto err;
+	}
+	/* parent is this hvd dev */
+	endpoint->device = device_create(eps_ctx->class, NULL,
+					 endpoint->dev, endpoint,
+					 endpoint->name);
+	if (IS_ERR(endpoint->device)) {
+		cdev_del(&endpoint->cdev);
+		ret = PTR_ERR(endpoint->device);
+		pr_err("(%s): device_create() failed\n", endpoint->name);
+		goto err;
+	}
+	dev_set_drvdata(endpoint->device, endpoint);
+
+	/* allocate physical pages for the endpoint PCIe BAR (rx) area.*/
+	ret = allocate_memory(eps_ctx, endpoint);
+	if (ret) {
+		pr_err("(%s): Failed to allocate physical pages\n",
+		       endpoint->name);
+		goto err;
+	}
+
+	/* allocate syncpoint for notification.*/
+	ret = allocate_syncpoint(eps_ctx, endpoint);
+	if (ret) {
+		pr_err("(%s): Failed to allocate syncpt shim for notifications\n",
+		       endpoint->name);
+		goto err;
+	}
+
+	/* Register for link events.*/
+	ops.callback = &(link_event_callback);
+	ops.ctx = (void *)(endpoint);
+	ret = pci_client_register_for_link_event(endpoint->pci_client_h, &ops,
+						 &endpoint->linkevent_id);
+	if (ret) {
+		pr_err("(%s): Failed to register for PCIe link events\n",
+		       endpoint->name);
+		goto err;
+	}
+
+	/* all okay.*/
+	return ret;
+err:
+	remove_endpoint_device(eps_ctx, endpoint);
+	return ret;
+}
+
+/*
+ * Entry point for the endpoint(s) char device sub-module/abstraction.
+ *
+ * On successful return (0), devices would have been created and ready to
+ * accept ioctls from user-space application.
+ */
+int
+endpoints_setup(struct driver_ctx_t *drv_ctx, void **endpoints_h)
+{
+	u32 i = 0;
+	int ret = 0;
+	struct endpoint_t *endpoint = NULL;
+	struct endpoint_prop_t *ep_prop = NULL;
+	struct endpoint_drv_ctx_t *eps_ctx = NULL;
+	struct stream_ext_params *stream_ext_params = NULL;
+
+	/* this cannot be initialized again.*/
+	if (WARN_ON(!drv_ctx || !endpoints_h || *endpoints_h))
+		return -EINVAL;
+
+	if (WARN_ON(drv_ctx->drv_param.nr_endpoint == 0 ||
+		    drv_ctx->drv_param.nr_endpoint > MAX_ENDPOINTS))
+		return -EINVAL;
+
+	if (WARN_ON(strlen(drv_ctx->drv_name) > (NAME_MAX - 1)))
+		return -EINVAL;
+
+	/* start by allocating the endpoint driver (global for all eps) ctx.*/
+	eps_ctx = kzalloc(sizeof(*eps_ctx), GFP_KERNEL);
+	if (WARN_ON(!eps_ctx))
+		return -ENOMEM;
+
+	eps_ctx->nr_endpoint = drv_ctx->drv_param.nr_endpoint;
+	eps_ctx->of_node = drv_ctx->drv_param.of_node;
+	strcpy(eps_ctx->drv_name, drv_ctx->drv_name);
+	init_waitqueue_head(&eps_ctx->eps_close_waitq);
+
+	/* allocate the whole chardev range */
+	ret = alloc_chrdev_region(&eps_ctx->char_dev, 0,
+				  eps_ctx->nr_endpoint, eps_ctx->drv_name);
+	if (ret < 0)
+		goto err;
+
+	eps_ctx->class = class_create(THIS_MODULE, eps_ctx->drv_name);
+	if (IS_ERR_OR_NULL(eps_ctx->class)) {
+		ret = PTR_ERR(eps_ctx->class);
+		goto err;
+	}
+
+	/* allocate char devices context for supported endpoints.*/
+	eps_ctx->endpoints = kzalloc((eps_ctx->nr_endpoint *
+				      sizeof(*eps_ctx->endpoints)),
+				     GFP_KERNEL);
+	if (WARN_ON(!eps_ctx->endpoints)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* create char devices for each endpoint.*/
+	for (i = 0; i < eps_ctx->nr_endpoint; i++) {
+		endpoint = &eps_ctx->endpoints[i];
+		ep_prop = &drv_ctx->drv_param.endpoint_props[i];
+		stream_ext_params = &endpoint->stream_ext_params;
+
+		/* copy the parameters from nvscic2c-pcie driver ctx.*/
+		strcpy(endpoint->name, ep_prop->name);
+		endpoint->minor = ep_prop->id;
+		endpoint->nframes = ep_prop->nframes;
+		endpoint->frame_sz = ep_prop->frame_sz;
+		endpoint->pci_client_h = drv_ctx->pci_client_h;
+		endpoint->eps_in_use = &eps_ctx->eps_in_use;
+		endpoint->eps_close_waitq = &eps_ctx->eps_close_waitq;
+		endpoint->host1x_pdev = drv_ctx->drv_param.host1x_pdev;
+		/* set index of the msi-x interruper vector
+		 * where the first one is reserved for comm-channel
+		 */
+		endpoint->msi_irq = i + 1;
+		stream_ext_params->local_node = &drv_ctx->drv_param.local_node;
+		stream_ext_params->peer_node = &drv_ctx->drv_param.peer_node;
+		stream_ext_params->host1x_pdev = drv_ctx->drv_param.host1x_pdev;
+		stream_ext_params->pci_client_h = drv_ctx->pci_client_h;
+		stream_ext_params->comm_channel_h = drv_ctx->comm_channel_h;
+		stream_ext_params->vmap_h = drv_ctx->vmap_h;
+		stream_ext_params->edma_h = drv_ctx->edma_h;
+		stream_ext_params->ep_id = ep_prop->id;
+		stream_ext_params->ep_name = endpoint->name;
+		stream_ext_params->drv_mode = drv_ctx->drv_mode;
+
+		/* create nvscic2c-pcie endpoint device.*/
+		ret = create_endpoint_device(eps_ctx, endpoint);
+		if (ret)
+			goto err;
+	}
+
+	*endpoints_h = eps_ctx;
+	return ret;
+err:
+	endpoints_release((void **)&eps_ctx);
+	return ret;
+}
+
+/* Exit point for nvscic2c-pcie endpoints: Wait for all endpoints to close.*/
+#define MAX_WAITFOR_CLOSE_TIMEOUT_MSEC	(5000)
+int
+endpoints_waitfor_close(void *endpoints_h)
+{
+	u32 i = 0;
+	int ret = 0;
+	long timeout = 0;
+	struct endpoint_drv_ctx_t *eps_ctx =
+				 (struct endpoint_drv_ctx_t *)endpoints_h;
+
+	if (!eps_ctx || !eps_ctx->endpoints)
+		return ret;
+
+	/*
+	 * Signal all endpoints about exit/shutdown. This also doesn't
+	 * allow them to be opened again unless reinitialized.
+	 */
+	for (i = 0; i < eps_ctx->nr_endpoint; i++) {
+		struct endpoint_t *endpoint = &eps_ctx->endpoints[i];
+
+		atomic_set(&endpoint->shutdown, 1);
+
+		/* allow fops_open() or fops_release() to complete.*/
+		mutex_lock(&endpoint->fops_lock);
+		mutex_unlock(&endpoint->fops_lock);
+	}
+
+	/* wait for endpoints to be closed. */
+	while (timeout == 0) {
+		timeout = wait_event_interruptible_timeout
+				(eps_ctx->eps_close_waitq,
+				 !(atomic_read(&eps_ctx->eps_in_use)),
+				 msecs_to_jiffies(MAX_WAITFOR_CLOSE_TIMEOUT_MSEC));
+
+		for (i = 0; i < eps_ctx->nr_endpoint; i++) {
+			struct endpoint_t *endpoint = &eps_ctx->endpoints[i];
+
+			if (atomic_read(&endpoint->in_use)) {
+				if (timeout == -ERESTARTSYS) {
+					ret = timeout;
+					pr_err("(%s): Wait for endpoint:(%s) close - Interrupted\n",
+					       eps_ctx->drv_name, endpoint->name);
+				} else if (timeout == 0) {
+					pr_err("(%s): Still waiting for endpoint:(%s) to close\n",
+					       eps_ctx->drv_name, endpoint->name);
+				} else {
+					/* erroneous case - should not happen.*/
+					ret = -EFAULT;
+					pr_err("(%s): Error: Endpoint: (%s) is still open\n",
+					       eps_ctx->drv_name, endpoint->name);
+				}
+			}
+		}
+	}
+
+	return ret;
+}
+
+/* exit point for nvscic2c-pcie endpoints char device sub-module/abstraction.*/
+int
+endpoints_release(void **endpoints_h)
+{
+	u32 i = 0;
+	int ret = 0;
+	struct endpoint_drv_ctx_t *eps_ctx =
+				 (struct endpoint_drv_ctx_t *)(*endpoints_h);
+	if (!eps_ctx)
+		return ret;
+
+	/* all endpoints must be closed.*/
+	if (atomic_read(&eps_ctx->eps_in_use))
+		pr_err("(%s): Unexpected. Endpoint(s) are still in-use.\n",
+		       eps_ctx->drv_name);
+
+	/* remove all the endpoints char devices.*/
+	if (eps_ctx->endpoints) {
+		for (i = 0; i < eps_ctx->nr_endpoint; i++) {
+			struct endpoint_t *endpoint =
+						 &eps_ctx->endpoints[i];
+			remove_endpoint_device(eps_ctx, endpoint);
+		}
+		kfree(eps_ctx->endpoints);
+		eps_ctx->endpoints = NULL;
+	}
+
+	if (eps_ctx->class) {
+		class_destroy(eps_ctx->class);
+		eps_ctx->class = NULL;
+	}
+
+	if (eps_ctx->char_dev) {
+		unregister_chrdev_region(eps_ctx->char_dev,
+					 eps_ctx->nr_endpoint);
+		eps_ctx->char_dev = 0;
+	}
+
+	kfree(eps_ctx);
+	*endpoints_h = NULL;
+
+	return ret;
+}
diff --git a/drivers/misc/nvscic2c-pcie/endpoint.h b/drivers/misc/nvscic2c-pcie/endpoint.h
new file mode 100644
index 00000000..51d82a3f
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/endpoint.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __ENDPOINT_H__
+#define __ENDPOINT_H__
+
+#include "common.h"
+
+/* forward declaration. */
+struct driver_ctx_t;
+
+/*
+ * Entry point for the endpoint(s) char device sub-module/abstraction.
+ *
+ * On successful return (0), devices would have been created and ready to
+ * accept ioctls from user-space application.
+ */
+int
+endpoints_setup(struct driver_ctx_t *drv_ctx, void **endpoints_h);
+
+/* Exit point for nvscic2c-pcie endpoints: Wait for all endpoints to close.*/
+int
+endpoints_waitfor_close(void *endpoints_h);
+
+/* exit point for nvscic2c-pcie endpoints char device sub-module/abstraction.*/
+int
+endpoints_release(void **endpoints_h);
+#endif /*__ENDPOINT_H__ */
diff --git a/drivers/misc/nvscic2c-pcie/epc/module.c b/drivers/misc/nvscic2c-pcie/epc/module.c
new file mode 100644
index 00000000..62fc0371
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/epc/module.c
@@ -0,0 +1,507 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#define pr_fmt(fmt)	"nvscic2c-pcie: epc: " fmt
+
+#include <linux/dma-iommu.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/pci.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/tegra-pcie-edma.h>
+#include <linux/types.h>
+
+#include "comm-channel.h"
+#include "common.h"
+#include "endpoint.h"
+#include "iova-alloc.h"
+#include "module.h"
+#include "pci-client.h"
+#include "vmap.h"
+
+static const struct pci_device_id nvscic2c_pcie_epc_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_C2C_1) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_C2C_2) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_C2C_3) },
+	{},
+};
+
+/* wrapper over tegra-pcie-edma init api. */
+static int
+edma_module_init(struct driver_ctx_t *drv_ctx)
+{
+	u8 i = 0;
+	int ret = 0;
+	struct tegra_pcie_edma_init_info info = {0};
+
+	if (WARN_ON(!drv_ctx || !drv_ctx->drv_param.edma_np))
+		return -EINVAL;
+
+	memset(&info, 0x0, sizeof(info));
+	info.np = drv_ctx->drv_param.edma_np;
+	info.edma_remote = NULL;
+	for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
+		info.tx[i].ch_type = EDMA_CHAN_XFER_ASYNC;
+		info.tx[i].num_descriptors = NUM_EDMA_DESC;
+	}
+	/*No use-case for RD channels.*/
+
+	drv_ctx->edma_h = tegra_pcie_edma_initialize(&info);
+	if (!drv_ctx->edma_h)
+		ret = -ENODEV;
+
+	return ret;
+}
+
+/* should stop any ongoing eDMA transfers.*/
+static void
+edma_module_stop(struct driver_ctx_t *drv_ctx)
+{
+	if (!drv_ctx || !drv_ctx->edma_h)
+		return;
+
+	tegra_pcie_edma_stop(drv_ctx->edma_h);
+}
+
+/* should not have any ongoing eDMA transfers.*/
+static void
+edma_module_deinit(struct driver_ctx_t *drv_ctx)
+{
+	if (!drv_ctx || !drv_ctx->edma_h)
+		return;
+
+	tegra_pcie_edma_deinit(drv_ctx->edma_h);
+	drv_ctx->edma_h = NULL;
+}
+
+static void
+free_inbound_area(struct pci_dev *pdev, struct dma_buff_t *self_mem)
+{
+	struct driver_ctx_t *drv_ctx = NULL;
+
+	if (!pdev || !self_mem)
+		return;
+
+	drv_ctx = pci_get_drvdata(pdev);
+	if (!drv_ctx)
+		return;
+
+	if (self_mem->dma_handle && drv_ctx->ivd_h)
+		iova_alloc_deinit(self_mem->dma_handle, self_mem->size,
+				  &drv_ctx->ivd_h);
+	self_mem->dma_handle = 0x0;
+}
+
+/*
+ * Allocate area visible to PCIe EP/DRV_MODE_EPF. To have symmetry between the
+ * two modules, even PCIe RP/DRV_MODE_EPC allocates an empty area for all writes
+ * from PCIe EP/DRV_MODE_EPF to land into. Also, all CPU access from PCIe EP/
+ * DRV_MODE_EPF need be for one continguous region.
+ */
+static int
+allocate_inbound_area(struct pci_dev *pdev, size_t win_size,
+		      struct dma_buff_t *self_mem)
+{
+	int ret = 0;
+	struct driver_ctx_t *drv_ctx = NULL;
+
+	drv_ctx = pci_get_drvdata(pdev);
+	if (!drv_ctx) {
+		pr_err("Could not fetch driver data.");
+		return -ENOMEM;
+	}
+
+	self_mem->size = win_size;
+	ret = iova_alloc_init(&pdev->dev, win_size, &self_mem->dma_handle,
+			      &drv_ctx->ivd_h);
+	if (ret) {
+		pr_err("iova_alloc_init() failed for size:(0x%lx)\n",
+		       self_mem->size);
+	}
+
+	return ret;
+}
+
+static void
+free_outbound_area(struct pci_dev *pdev, struct pci_aper_t *peer_mem)
+{
+	if (!pdev || !peer_mem)
+		return;
+
+	peer_mem->aper = 0x0;
+	peer_mem->size = 0;
+}
+
+/* Assign outbound pcie aperture for CPU/eDMA access towards PCIe EP. */
+static int
+assign_outbound_area(struct pci_dev *pdev, size_t win_size,
+		     struct pci_aper_t *peer_mem)
+{
+	int ret = 0;
+
+	peer_mem->size = win_size;
+	peer_mem->aper = pci_resource_start(pdev, 0);
+
+	return ret;
+}
+
+/* Handle link message from @DRV_MODE_EPF. */
+static void
+link_msg_cb(void *data, void *ctx)
+{
+	struct comm_msg *msg = (struct comm_msg *)data;
+	struct driver_ctx_t *drv_ctx = (struct driver_ctx_t *)ctx;
+
+	if (WARN_ON(!msg || !drv_ctx))
+		return;
+
+	if (msg->u.link.status == NVSCIC2C_PCIE_LINK_UP) {
+		complete_all(&drv_ctx->epc_ctx->epf_ready_cmpl);
+	} else if (msg->u.link.status == NVSCIC2C_PCIE_LINK_DOWN) {
+		complete_all(&drv_ctx->epc_ctx->epf_shutdown_cmpl);
+	} else {
+		pr_err("(%s): spurious link message received from EPF\n",
+		       drv_ctx->drv_name);
+		return;
+	}
+
+	/* inidicate link status to application.*/
+	pci_client_change_link_status(drv_ctx->pci_client_h,
+				      msg->u.link.status);
+}
+
+/*
+ * PCIe subsystem invokes .shutdown()/.remove() handler when the PCIe EP
+ * is hot-unplugged (gracefully) or @DRV_MODE_EPC(this) is unloaded while
+ * the PCIe link was still active or when PCIe EP goes abnormal shutdown/
+ * reboot.
+ */
+#define MAX_EPF_SHUTDOWN_TIMEOUT_MSEC	(5000)
+static void
+nvscic2c_pcie_epc_remove(struct pci_dev *pdev)
+{
+	int ret = 0;
+	long timeout = 0;
+	struct comm_msg msg = {0};
+	struct driver_ctx_t *drv_ctx = NULL;
+
+	if (!pdev)
+		return;
+
+	drv_ctx = pci_get_drvdata(pdev);
+	if (!drv_ctx)
+		return;
+
+	/*
+	 * send link down message to EPF. EPF apps can stop processing when
+	 * they see this, else the EPF apps can continue processing as EPC
+	 * waits for its apps to close before sending SHUTDOWN msg.
+	 */
+	msg.type = COMM_MSG_TYPE_LINK;
+	msg.u.link.status = NVSCIC2C_PCIE_LINK_DOWN;
+	ret = comm_channel_ctrl_msg_send(drv_ctx->comm_channel_h, &msg);
+	if (ret)
+		pr_err("(%s): Failed to send LINK(DOWN) message\n",
+		       drv_ctx->drv_name);
+
+	/* local apps can stop processing if they see this.*/
+	pci_client_change_link_status(drv_ctx->pci_client_h,
+				      NVSCIC2C_PCIE_LINK_DOWN);
+	/*
+	 * stop ongoing and pending edma xfers, this edma module shall not
+	 * accept new xfer submissions after this.
+	 */
+	edma_module_stop(drv_ctx);
+
+	/* wait for @DRV_MODE_EPC (local) endpoints to close. */
+	ret = endpoints_waitfor_close(drv_ctx->endpoints_h);
+	if (ret)
+		pr_err("(%s): Error waiting for endpoints to close\n",
+		       drv_ctx->drv_name);
+
+	/* if PCIe EP SoC went away abruptly already, jump to local deinit. */
+	if (!pci_device_is_present(pdev))
+		goto deinit;
+
+	/*
+	 * Wait for @DRV_MODE_EPF to ACK it's closure too.
+	 *
+	 * Before this @DRV_MODE_EPC(this) endpoints must be closed already
+	 * as @DRV_MODE_EPF in response to this msg shall free all it's
+	 * endpoint mappings.
+	 */
+	reinit_completion(&drv_ctx->epc_ctx->epf_shutdown_cmpl);
+	msg.type = COMM_MSG_TYPE_SHUTDOWN;
+	if (comm_channel_ctrl_msg_send(drv_ctx->comm_channel_h, &msg)) {
+		pr_err("(%s): Failed to send shutdown message\n",
+		       drv_ctx->drv_name);
+		goto deinit;
+	}
+
+	while (timeout == 0) {
+		timeout = wait_for_completion_interruptible_timeout
+				(&drv_ctx->epc_ctx->epf_shutdown_cmpl,
+				 msecs_to_jiffies(MAX_EPF_SHUTDOWN_TIMEOUT_MSEC));
+		if (timeout == -ERESTARTSYS) {
+			pr_err("(%s): Wait for nvscic2c-pcie-epf to close - interrupted\n",
+			       drv_ctx->drv_name);
+		} else if (timeout == 0) {
+			/*
+			 * continue wait only if PCIe EP SoC is still there. It can
+			 * go away abruptly waiting for it's own endpoints to close.
+			 */
+			if (pci_device_is_present(pdev)) {
+				pr_err("(%s): Still waiting for nvscic2c-pcie-epf to close\n",
+				       drv_ctx->drv_name);
+			} else {
+				pr_debug("(%s): nvscic2c-pcie-epf went away\n",
+					 drv_ctx->drv_name);
+				break;
+			}
+		} else if (timeout > 0) {
+			pr_debug("(%s): nvscic2c-pcie-epf closed\n",
+				 drv_ctx->drv_name);
+		}
+	}
+
+deinit:
+	comm_channel_unregister_msg_cb(drv_ctx->comm_channel_h,
+				       COMM_MSG_TYPE_LINK);
+	endpoints_release(&drv_ctx->endpoints_h);
+	edma_module_deinit(drv_ctx);
+	vmap_deinit(&drv_ctx->vmap_h);
+	comm_channel_deinit(&drv_ctx->comm_channel_h);
+	pci_client_deinit(&drv_ctx->pci_client_h);
+	free_outbound_area(pdev, &drv_ctx->peer_mem);
+	free_inbound_area(pdev, &drv_ctx->self_mem);
+
+	pci_release_region(pdev, 0);
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+
+	dt_release(&drv_ctx->drv_param);
+
+	pci_set_drvdata(pdev, NULL);
+	kfree(drv_ctx->epc_ctx);
+	kfree_const(drv_ctx->drv_name);
+	kfree(drv_ctx);
+}
+
+#define MAX_EPF_SETUP_TIMEOUT_MSEC	(5000)
+static int
+nvscic2c_pcie_epc_probe(struct pci_dev *pdev,
+			const struct pci_device_id *id)
+{
+	int ret = 0;
+	char *name = NULL;
+	size_t win_size = 0;
+	struct comm_msg msg = {0};
+	unsigned long timeout = 0;
+	struct callback_ops cb_ops = {0};
+	struct driver_ctx_t *drv_ctx = NULL;
+	struct epc_context_t *epc_ctx = NULL;
+	struct pci_client_params params = {0};
+
+	/* allocate module context.*/
+	drv_ctx = kzalloc(sizeof(*drv_ctx), GFP_KERNEL);
+	if (WARN_ON(!drv_ctx))
+		return -ENOMEM;
+
+	name = kasprintf(GFP_KERNEL, "%s-%x", DRIVER_NAME_EPC, id->device);
+	if (WARN_ON(!name)) {
+		kfree(drv_ctx);
+		return -ENOMEM;
+	}
+
+	epc_ctx = kzalloc(sizeof(*epc_ctx), GFP_KERNEL);
+	if (WARN_ON(!epc_ctx)) {
+		kfree(name);
+		kfree(drv_ctx);
+		return -ENOMEM;
+	}
+	init_completion(&epc_ctx->epf_ready_cmpl);
+	init_completion(&epc_ctx->epf_shutdown_cmpl);
+
+	drv_ctx->drv_mode = DRV_MODE_EPC;
+	drv_ctx->drv_name = name;
+	drv_ctx->epc_ctx = epc_ctx;
+	pci_set_drvdata(pdev, drv_ctx);
+
+	/* check for the device tree node against this Id, must be only one.*/
+	ret = dt_parse(id->device, drv_ctx->drv_mode, &drv_ctx->drv_param);
+	if (ret)
+		goto err_dt_parse;
+
+	ret = pcim_enable_device(pdev);
+	if (ret)
+		goto err_enable_device;
+	pci_set_master(pdev);
+	ret = pci_request_region(pdev, 0, MODULE_NAME);
+	if (ret)
+		goto err_request_region;
+
+	win_size = pci_resource_len(pdev, 0);
+	ret = allocate_inbound_area(pdev, win_size, &drv_ctx->self_mem);
+	if (ret)
+		goto err_alloc_inbound;
+
+	ret = assign_outbound_area(pdev, win_size, &drv_ctx->peer_mem);
+	if (ret)
+		goto err_assign_outbound;
+
+	params.dev = &pdev->dev;
+	params.self_mem = &drv_ctx->self_mem;
+	params.peer_mem = &drv_ctx->peer_mem;
+	ret = pci_client_init(&params, &drv_ctx->pci_client_h);
+	if (ret) {
+		pr_err("(%s): pci_client_init() failed\n",
+		       drv_ctx->drv_name);
+		goto err_pci_client;
+	}
+	pci_client_save_driver_ctx(drv_ctx->pci_client_h, drv_ctx);
+	pci_client_save_peer_cpu(drv_ctx->pci_client_h, NVCPU_ORIN);
+
+	ret = comm_channel_init(drv_ctx, &drv_ctx->comm_channel_h);
+	if (ret) {
+		pr_err("(%s): Failed to initialize comm-channel\n",
+		       drv_ctx->drv_name);
+		goto err_comm_init;
+	}
+
+	ret = vmap_init(drv_ctx, &drv_ctx->vmap_h);
+	if (ret) {
+		pr_err("(%s): Failed to initialize vmap\n",
+		       drv_ctx->drv_name);
+		goto err_vmap_init;
+	}
+
+	ret = edma_module_init(drv_ctx);
+	if (ret) {
+		pr_err("(%s): Failed to initialize edma module\n",
+		       drv_ctx->drv_name);
+		goto err_edma_init;
+	}
+
+	ret = endpoints_setup(drv_ctx, &drv_ctx->endpoints_h);
+	if (ret) {
+		pr_err("(%s): Failed to initialize endpoints\n",
+		       drv_ctx->drv_name);
+		goto err_endpoints_init;
+	}
+
+	/* register for link status message from @DRV_MODE_EPF (PCIe EP).*/
+	cb_ops.callback = link_msg_cb;
+	cb_ops.ctx = (void *)drv_ctx;
+	ret = comm_channel_register_msg_cb(drv_ctx->comm_channel_h,
+					   COMM_MSG_TYPE_LINK, &cb_ops);
+	if (ret) {
+		pr_err("(%s): Failed to register for link message\n",
+		       drv_ctx->drv_name);
+		goto err_register_msg;
+	}
+
+	/*
+	 * share iova with @DRV_MODE_EPF for it's outbound translation.
+	 * This must be send only after comm-channel, endpoint memory backing
+	 * is created and mapped to self_mem. @DRV_MODE_EPF on seeing this
+	 * message shall send link-up message over comm-channel and possibly
+	 * applications can also start endpoint negotiation, therefore.
+	 */
+	reinit_completion(&drv_ctx->epc_ctx->epf_ready_cmpl);
+	msg.type = COMM_MSG_TYPE_BOOTSTRAP;
+	msg.u.bootstrap.iova = drv_ctx->self_mem.dma_handle;
+	msg.u.bootstrap.peer_cpu = NVCPU_ORIN;
+	ret = comm_channel_ctrl_msg_send(drv_ctx->comm_channel_h, &msg);
+	if (ret) {
+		pr_err("(%s): Failed to send comm bootstrap message\n",
+		       drv_ctx->drv_name);
+		goto err_msg_send;
+	}
+
+	/* wait for @DRV_MODE_EPF to acknowledge it's endpoints are created.*/
+	timeout =
+	wait_for_completion_timeout(&drv_ctx->epc_ctx->epf_ready_cmpl,
+				    msecs_to_jiffies(MAX_EPF_SETUP_TIMEOUT_MSEC));
+	if (timeout == 0) {
+		ret = -ENOLINK;
+		pr_err("(%s): Timed-out waiting for nvscic2c-pcie-epf\n",
+		       drv_ctx->drv_name);
+		goto err_epf_ready;
+	}
+
+	pci_set_drvdata(pdev, drv_ctx);
+	return ret;
+
+err_epf_ready:
+err_msg_send:
+	comm_channel_unregister_msg_cb(drv_ctx->comm_channel_h,
+				       COMM_MSG_TYPE_LINK);
+err_register_msg:
+	endpoints_release(&drv_ctx->endpoints_h);
+
+err_endpoints_init:
+	edma_module_deinit(drv_ctx);
+
+err_edma_init:
+	vmap_deinit(&drv_ctx->vmap_h);
+
+err_vmap_init:
+	comm_channel_deinit(&drv_ctx->comm_channel_h);
+
+err_comm_init:
+	pci_client_deinit(&drv_ctx->pci_client_h);
+
+err_pci_client:
+	free_outbound_area(pdev, &drv_ctx->peer_mem);
+
+err_assign_outbound:
+	free_inbound_area(pdev, &drv_ctx->self_mem);
+
+err_alloc_inbound:
+	pci_release_region(pdev, 0);
+
+err_request_region:
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+
+err_enable_device:
+	dt_release(&drv_ctx->drv_param);
+
+err_dt_parse:
+	pci_set_drvdata(pdev, NULL);
+	kfree_const(drv_ctx->drv_name);
+	kfree(drv_ctx);
+	return ret;
+}
+
+MODULE_DEVICE_TABLE(pci, nvscic2c_pcie_epc_tbl);
+static struct pci_driver nvscic2c_pcie_epc_driver = {
+	.name		= DRIVER_NAME_EPC,
+	.id_table	= nvscic2c_pcie_epc_tbl,
+	.probe		= nvscic2c_pcie_epc_probe,
+	.remove		= nvscic2c_pcie_epc_remove,
+	.shutdown	= nvscic2c_pcie_epc_remove,
+};
+module_pci_driver(nvscic2c_pcie_epc_driver);
+
+#define DRIVER_LICENSE		"GPL v2"
+#define DRIVER_DESCRIPTION	"NVIDIA Chip-to-Chip transfer module for PCIeRP"
+#define DRIVER_AUTHOR		"Nvidia Corporation"
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+MODULE_LICENSE(DRIVER_LICENSE);
+MODULE_AUTHOR(DRIVER_AUTHOR);
diff --git a/drivers/misc/nvscic2c-pcie/epf/module.c b/drivers/misc/nvscic2c-pcie/epf/module.c
new file mode 100644
index 00000000..bcda9615
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/epf/module.c
@@ -0,0 +1,849 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#define pr_fmt(fmt)	"nvscic2c-pcie: epf: " fmt
+
+#include <linux/dma-iommu.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/pci-epc.h>
+#include <linux/pci-epf.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/tegra-pcie-edma.h>
+#include <linux/types.h>
+
+#include "comm-channel.h"
+#include "common.h"
+#include "endpoint.h"
+#include "iova-alloc.h"
+#include "module.h"
+#include "pci-client.h"
+#include "vmap.h"
+
+static const struct pci_epf_device_id nvscic2c_pcie_epf_ids[] = {
+	{
+		.name = "nvscic2c_epf_22CB",
+		.driver_data = (kernel_ulong_t)PCI_DEVICE_ID_C2C_1,
+	},
+	{
+		.name = "nvscic2c_epf_22CC",
+		.driver_data = (kernel_ulong_t)PCI_DEVICE_ID_C2C_2,
+	},
+	{
+		.name = "nvscic2c_epf_22CD",
+		.driver_data = (kernel_ulong_t)PCI_DEVICE_ID_C2C_3,
+	},
+	{},
+};
+
+/* wrapper over tegra-pcie-edma init api. */
+static int
+edma_module_init(struct driver_ctx_t *drv_ctx)
+{
+	u8 i = 0;
+	int ret = 0;
+	struct tegra_pcie_edma_init_info info = {0};
+
+	if (WARN_ON(!drv_ctx || !drv_ctx->drv_param.edma_np))
+		return -EINVAL;
+
+	memset(&info, 0x0, sizeof(info));
+	info.np = drv_ctx->drv_param.edma_np;
+	info.edma_remote = NULL;
+
+	for (i = 0; i < DMA_WR_CHNL_NUM; i++) {
+		info.tx[i].ch_type = EDMA_CHAN_XFER_ASYNC;
+		info.tx[i].num_descriptors = NUM_EDMA_DESC;
+	}
+	/*No use-case for RD channels.*/
+
+	drv_ctx->edma_h = tegra_pcie_edma_initialize(&info);
+	if (!drv_ctx->edma_h)
+		ret = -ENODEV;
+
+	return ret;
+}
+
+/* should stop any ongoing eDMA transfers.*/
+static void
+edma_module_stop(struct driver_ctx_t *drv_ctx)
+{
+	if (!drv_ctx || !drv_ctx->edma_h)
+		return;
+
+	tegra_pcie_edma_stop(drv_ctx->edma_h);
+}
+
+/* should not have any ongoing eDMA transfers.*/
+static void
+edma_module_deinit(struct driver_ctx_t *drv_ctx)
+{
+	if (!drv_ctx || !drv_ctx->edma_h)
+		return;
+
+	tegra_pcie_edma_deinit(drv_ctx->edma_h);
+	drv_ctx->edma_h = NULL;
+}
+
+static void
+free_inbound_area(struct pci_epf *epf, struct dma_buff_t *self_mem)
+{
+	struct driver_ctx_t *drv_ctx = NULL;
+
+	if (!epf || !self_mem)
+		return;
+
+	drv_ctx = epf_get_drvdata(epf);
+	if (!drv_ctx)
+		return;
+
+	if (self_mem->dma_handle && drv_ctx->ivd_h)
+		iova_alloc_deinit(self_mem->dma_handle, self_mem->size,
+				  &drv_ctx->ivd_h);
+	self_mem->dma_handle = 0x0;
+}
+
+/*
+ * Allocate BAR backing iova region. Writes from peer SoC shall
+ * land in this region for it to be forwarded to system iommu to eventually
+ * land in BAR backing physical region.
+ */
+static int
+allocate_inbound_area(struct pci_epf *epf, size_t win_size,
+		      struct dma_buff_t *self_mem)
+{
+	int ret = 0;
+	struct driver_ctx_t *drv_ctx = NULL;
+
+	drv_ctx = epf_get_drvdata(epf);
+	if (!drv_ctx)
+		return -ENOMEM;
+
+	self_mem->size = win_size;
+	ret = iova_alloc_init(epf->epc->dev.parent, win_size,
+			      &self_mem->dma_handle, &drv_ctx->ivd_h);
+	if (ret) {
+		pr_err("iova_domain_init() failed for size:(0x%lx)\n",
+		       self_mem->size);
+	}
+
+	return ret;
+}
+
+static void
+free_outbound_area(struct pci_epf *epf, struct pci_aper_t *peer_mem)
+{
+	if (!epf || !peer_mem || !peer_mem->pva)
+		return;
+
+	pci_epc_mem_free_addr(epf->epc, peer_mem->aper,
+			      peer_mem->pva,
+			      peer_mem->size);
+	peer_mem->pva = NULL;
+}
+
+/*
+ * Allocate outbound pcie aperture for CPU access towards PCIe RP.
+ * It is assumed that PCIe RP shall also allocate an equivalent size of inbound
+ * area as PCIe EP (it's BAR0 length).
+ */
+static int
+allocate_outbound_area(struct pci_epf *epf, size_t win_size,
+		       struct pci_aper_t *peer_mem)
+{
+	int ret = 0;
+
+	peer_mem->size = win_size;
+	peer_mem->pva = pci_epc_mem_alloc_addr(epf->epc,
+					       &peer_mem->aper,
+					       peer_mem->size);
+	if (!peer_mem->pva) {
+		ret = -ENOMEM;
+		pr_err("pci_epc_mem_alloc_addr() fails for size:(0x%lx)\n",
+		       peer_mem->size);
+	}
+
+	return ret;
+}
+
+static void
+clear_inbound_translation(struct pci_epf *epf)
+{
+	struct pci_epf_bar *epf_bar = &epf->bar[BAR_0];
+
+	pci_epc_clear_bar(epf->epc, epf->func_no, PCIE_VFNO, epf_bar);
+
+	/* no api to clear epf header.*/
+}
+
+static int
+set_inbound_translation(struct pci_epf *epf)
+{
+	int ret = 0;
+	struct pci_epc *epc = epf->epc;
+	struct pci_epf_bar *epf_bar = &epf->bar[BAR_0];
+
+	ret = pci_epc_write_header(epc, epf->func_no, PCIE_VFNO, epf->header);
+	if (ret < 0) {
+		pr_err("Failed to write PCIe header\n");
+		return ret;
+	}
+
+	/* BAR:0 setttings are already done in _bind().*/
+	ret = pci_epc_set_bar(epc, epf->func_no, PCIE_VFNO, epf_bar);
+	if (ret) {
+		pr_err("pci_epc_set_bar() failed\n");
+		return ret;
+	}
+
+	ret = pci_epc_set_msi(epc, epf->func_no, PCIE_VFNO,
+			      epf->msi_interrupts);
+	if (ret) {
+		pr_err("pci_epc_set_msi() failed\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static void
+clear_outbound_translation(struct pci_epf *epf, struct pci_aper_t *peer_mem)
+{
+	return pci_epc_unmap_addr(epf->epc, epf->func_no, PCIE_VFNO,
+				  peer_mem->aper);
+}
+
+static int
+set_outbound_translation(struct pci_epf *epf, struct pci_aper_t *peer_mem,
+			 u64 peer_iova)
+{
+	return pci_epc_map_addr(epf->epc, epf->func_no, PCIE_VFNO,
+				peer_mem->aper,	peer_iova, peer_mem->size);
+}
+
+static void
+edma_rx_desc_iova_send(struct driver_ctx_t *drv_ctx)
+{
+	int ret;
+	struct comm_msg msg = {0};
+
+	msg.type = COMM_MSG_TYPE_EDMA_RX_DESC_IOVA_RETURN;
+	msg.u.edma_rx_desc_iova.iova = pci_client_get_edma_rx_desc_iova(drv_ctx->pci_client_h);
+
+	ret = comm_channel_ctrl_msg_send(drv_ctx->comm_channel_h, &msg);
+	if (ret)
+		pr_err("failed sending COMM_MSG_TYPE_EDMA_CH_DESC_IOVA_RETURN  message\n");
+}
+
+/* Handle bootstrap message from @DRV_MODE_EPC. */
+static void
+bootstrap_msg_cb(void *data, void *ctx)
+{
+	int ret = 0;
+	struct driver_ctx_t *drv_ctx = NULL;
+	struct pci_epf *epf = (struct pci_epf *)ctx;
+	struct comm_msg *msg = (struct comm_msg *)data;
+
+	if (WARN_ON(!msg || !epf))
+		return;
+
+	drv_ctx = epf_get_drvdata(epf);
+	if (!drv_ctx)
+		return;
+
+	/*
+	 * setup outbound translation for CPU access from @DRV_MODE_EPF ->
+	 * @DRV_MODE_EPC using the iova received from @DRV_MODE_EPC in
+	 * bootstrap message.
+	 *
+	 * Must be done here, as return of the comm-channel message callback
+	 * shall use CPU on @DRV_MODE_EPF to indicate message read.
+	 */
+	ret = set_outbound_translation(epf, &drv_ctx->peer_mem,
+				       msg->u.bootstrap.iova);
+	if (ret) {
+		pr_err("Failed to set outbound (peer) memory translation\n");
+		return;
+	}
+
+	pci_client_save_peer_cpu(drv_ctx->pci_client_h, msg->u.bootstrap.peer_cpu);
+
+	/* send edma rx desc iova  to x86 peer(rp) */
+	if (msg->u.bootstrap.peer_cpu == NVCPU_X86_64)
+		edma_rx_desc_iova_send(drv_ctx);
+
+	/*
+	 * schedule initialization of remaining interfaces as it could not
+	 * be done in _notifier()(PCIe EP controller is still uninitialized
+	 * then). Also abstraction: vmap registers with comm-channel, such
+	 * callback registrations cannot happen while in the context of
+	 * another comm-channel callback (this function).
+	 */
+	schedule_work(&drv_ctx->epf_ctx->initialization_work);
+}
+
+/*
+ * tasklet/scheduled work for initialization of remaining interfaces
+ * (that which could not be done in _bind(), Reason: endpoint abstraction
+ *  requires:
+ *   - peer iova - not available unless bootstrap message.
+ *   - edma cookie - cannot be done during _notifier() - interrupt context).
+ * )
+ */
+static void
+init_work(struct work_struct *work)
+{
+	int ret = 0;
+	struct comm_msg msg = {0};
+	struct epf_context_t *epf_ctx =
+		container_of(work, struct epf_context_t, initialization_work);
+	struct driver_ctx_t *drv_ctx = (struct driver_ctx_t *)epf_ctx->drv_ctx;
+
+	if (atomic_read(&drv_ctx->epf_ctx->epf_initialized)) {
+		pr_err("(%s): Already initialized\n", drv_ctx->drv_name);
+		goto err;
+	}
+
+	ret = vmap_init(drv_ctx, &drv_ctx->vmap_h);
+	if (ret) {
+		pr_err("(%s): vmap_init() failed\n", drv_ctx->drv_name);
+		goto err;
+	}
+
+	ret = edma_module_init(drv_ctx);
+	if (ret) {
+		pr_err("(%s): edma_module_init() failed\n", drv_ctx->drv_name);
+		goto err_edma_init;
+	}
+
+	ret = endpoints_setup(drv_ctx, &drv_ctx->endpoints_h);
+	if (ret) {
+		pr_err("(%s): endpoints_setup() failed\n", drv_ctx->drv_name);
+		goto err_endpoint;
+	}
+
+	/*
+	 * this is an acknowledgment to @DRV_MODE_EPC in response to it's
+	 * bootstrap message to indicate @DRV_MODE_EPF endpoints are ready.
+	 */
+	msg.type = COMM_MSG_TYPE_LINK;
+	msg.u.link.status = NVSCIC2C_PCIE_LINK_UP;
+	ret = comm_channel_ctrl_msg_send(drv_ctx->comm_channel_h, &msg);
+	if (ret) {
+		pr_err("(%s): Failed to send LINK(UP) message\n",
+		       drv_ctx->drv_name);
+		goto err_msg_send;
+	}
+
+	/* inidicate link-up to applications.*/
+	atomic_set(&drv_ctx->epf_ctx->epf_initialized, 1);
+	pci_client_change_link_status(drv_ctx->pci_client_h,
+				      NVSCIC2C_PCIE_LINK_UP);
+	return;
+
+err_msg_send:
+	endpoints_release(&drv_ctx->endpoints_h);
+err_endpoint:
+	edma_module_deinit(drv_ctx);
+err_edma_init:
+	vmap_deinit(&drv_ctx->vmap_h);
+err:
+	return;
+}
+
+/*
+ * PCIe subsystem calls struct pci_epc_event_ops.core_init
+ * when PCIe hot-plug is initiated and before link trainig
+ * starts with PCIe RP SoC (before @DRV_MODE_EPC .probe() handler is invoked).
+ *
+ * Because, CORE_INIT impacts link training timeout, it shall do only minimum
+ * required for @DRV_MODE_EPF for PCIe EP initialization.
+ *
+ * This is received in interrupt context.
+ */
+static int
+nvscic2c_pcie_epf_core_init(struct pci_epf *epf)
+{
+	int ret = 0;
+	struct driver_ctx_t *drv_ctx = NULL;
+
+	drv_ctx = epf_get_drvdata(epf);
+	if (!drv_ctx)
+		return -EINVAL;
+
+	if (atomic_read(&drv_ctx->epf_ctx->core_initialized)) {
+		pr_err("(%s): Received CORE_INIT callback again\n",
+		       drv_ctx->drv_name);
+		return -EINVAL;
+	}
+
+	ret = set_inbound_translation(epf);
+	if (ret) {
+		pr_err("(%s): set_inbound_translation() failed\n",
+		       drv_ctx->drv_name);
+		return ret;
+	}
+
+	atomic_set(&drv_ctx->epf_ctx->core_initialized, 1);
+
+	return ret;
+}
+
+/* Handle link message from @DRV_MODE_EPC. */
+static void
+shutdown_msg_cb(void *data, void *ctx)
+{
+	struct driver_ctx_t *drv_ctx = NULL;
+	struct pci_epf *epf = (struct pci_epf *)ctx;
+	struct comm_msg *msg = (struct comm_msg *)data;
+
+	if (WARN_ON(!msg || !epf))
+		return;
+
+	drv_ctx = epf_get_drvdata(epf);
+	if (!drv_ctx)
+		return;
+
+	if (!atomic_read(&drv_ctx->epf_ctx->epf_initialized)) {
+		pr_err("(%s): Unexpected shutdown msg from nvscic2c-pcie-epc\n",
+		       drv_ctx->drv_name);
+		return;
+	}
+
+	/* schedule deinitialization of epf interfaces. */
+	schedule_work(&drv_ctx->epf_ctx->deinitialization_work);
+}
+
+/*
+ * tasklet/scheduled work for de-initialization of @DRV_MODE_EPF(this)
+ * interfaces. It is done in a tasklet for the following scenario:
+ * @DRV_MODE_EPC can get unloaded(rmmod) and reinserted(insmod) while the
+ * PCIe link with PCIe EP SoC still active. So before we receive
+ * bootstrap message again when @DRV_MODE_EPC is reinserted, we would need
+ * to clean-up all abstractions before they can be reinit again.
+ *
+ * In case of abnormal shutdown of PCIe RP SoC, @DRV_MODE_EPF shall receive
+ * CORE_DEINIT directly from PCIe sub-system without any comm-message from
+ * @DRV_MODE_EPC.
+ */
+static void
+deinit_work(struct work_struct *work)
+{
+	int ret = 0;
+	struct comm_msg msg = {0};
+	struct epf_context_t *epf_ctx =
+		container_of(work, struct epf_context_t, deinitialization_work);
+	struct driver_ctx_t *drv_ctx = (struct driver_ctx_t *)epf_ctx->drv_ctx;
+
+	if (!atomic_read(&drv_ctx->epf_ctx->epf_initialized))
+		return;
+
+	/* local apps can stop processing if they see this.*/
+	pci_client_change_link_status(drv_ctx->pci_client_h,
+				      NVSCIC2C_PCIE_LINK_DOWN);
+	/*
+	 * stop ongoing and pending edma xfers, this edma module shall not
+	 * accept new xfer submissions after this.
+	 */
+	edma_module_stop(drv_ctx);
+
+	/* wait for @DRV_MODE_EPF (local)endpoints to close. */
+	ret = endpoints_waitfor_close(drv_ctx->endpoints_h);
+	if (ret) {
+		pr_err("(%s): Error waiting for endpoints to close\n",
+		       drv_ctx->drv_name);
+	}
+	/* Even in case of error, continue to deinit - cannot be recovered.*/
+
+	/*
+	 * Acknowledge @DRV_MODE_EPC that @DRV_MODE_EPF(this) endpoints are
+	 * closed. If PCIe RP SoC went abnormally away(halt/reset/kernel oops)
+	 * signal anyway (sending signal will not cause local SoC fault when
+	 * PCIe RP SoC (@DRV_MODE_EPC) went abnormally away).
+	 */
+	msg.type = COMM_MSG_TYPE_LINK;
+	msg.u.link.status = NVSCIC2C_PCIE_LINK_DOWN;
+	ret = comm_channel_ctrl_msg_send(drv_ctx->comm_channel_h, &msg);
+	if (ret)
+		pr_err("(%s): Failed to send LINK (DOWN) message\n",
+		       drv_ctx->drv_name);
+
+	endpoints_release(&drv_ctx->endpoints_h);
+	edma_module_deinit(drv_ctx);
+	vmap_deinit(&drv_ctx->vmap_h);
+	clear_outbound_translation(drv_ctx->epf_ctx->epf, &drv_ctx->peer_mem);
+	atomic_set(&drv_ctx->epf_ctx->epf_initialized, 0);
+}
+
+/*
+ * Graceful shutdown: PCIe subsystem calls struct pci_epc_event_ops.core_deinit
+ * when @DRV_MODE_EPC .remove() or .shutdown() handlers are completed/exited.
+ * Abnormal shutdown (when PCIe RP SoC - gets halted, or it's kernel oops):
+ * PCIe subsystem also struct pci_epc_event_ops.core_deinit but
+ * @DRV_MODE_EPC would have already gone then by the time
+ * struct pci_epc_event_ops.core_deinit is called.
+ */
+static int
+nvscic2c_pcie_epf_core_deinit(struct pci_epf *epf)
+{
+	struct driver_ctx_t *drv_ctx = NULL;
+
+	drv_ctx = epf_get_drvdata(epf);
+	if (!drv_ctx)
+		return -EINVAL;
+
+	if (atomic_read(&drv_ctx->epf_ctx->core_initialized)) {
+		/*
+		 * in case of PCIe RP SoC abnormal shutdown, comm-channel
+		 * shutdown message from @DRV_MODE_EPC won't come and
+		 * therefore scheduling the deinit work here is required
+		 * If its already scheduled, it won't be scheduled again.
+		 * Wait for deinit work to complete in either case.
+		 */
+		schedule_work(&drv_ctx->epf_ctx->deinitialization_work);
+		flush_work(&drv_ctx->epf_ctx->deinitialization_work);
+
+		clear_inbound_translation(epf);
+		atomic_set(&drv_ctx->epf_ctx->core_initialized, 0);
+	}
+	wake_up_interruptible_all(&drv_ctx->epf_ctx->core_initialized_waitq);
+
+	return 0;
+}
+
+/* Handle link message from @DRV_MODE_EPC. */
+static void
+link_msg_cb(void *data, void *ctx)
+{
+	struct pci_epf *epf = (struct pci_epf *)ctx;
+	struct comm_msg *msg = (struct comm_msg *)data;
+	struct driver_ctx_t *drv_ctx = (struct driver_ctx_t *)ctx;
+
+	if (WARN_ON(!msg || !epf))
+		return;
+
+	drv_ctx = epf_get_drvdata(epf);
+	if (!drv_ctx)
+		return;
+
+	if (msg->u.link.status != NVSCIC2C_PCIE_LINK_DOWN) {
+		pr_err("(%s): spurious link message received from EPC\n",
+		       drv_ctx->drv_name);
+		return;
+	}
+
+	/* inidicate link status to application.*/
+	pci_client_change_link_status(drv_ctx->pci_client_h,
+				      msg->u.link.status);
+}
+
+/*
+ * ASSUMPTION: applications on and @DRV_MODE_EPC(PCIe RP) must have stopped
+ * communicating with application and @DRV_MODE_EPF (this) before this point.
+ */
+static void
+nvscic2c_pcie_epf_unbind(struct pci_epf *epf)
+{
+	long ret = 0;
+	struct driver_ctx_t *drv_ctx = NULL;
+
+	if (!epf)
+		return;
+
+	drv_ctx = epf_get_drvdata(epf);
+	if (!drv_ctx)
+		return;
+
+	/* timeout should be higher than that of endpoints to close.*/
+	ret = wait_event_interruptible
+			(drv_ctx->epf_ctx->core_initialized_waitq,
+			 !(atomic_read(&drv_ctx->epf_ctx->core_initialized)));
+	if (ret == -ERESTARTSYS)
+		pr_err("(%s): Interrupted waiting for CORE_DEINIT to complete\n",
+		       drv_ctx->drv_name);
+
+	comm_channel_unregister_msg_cb(drv_ctx->comm_channel_h,
+				       COMM_MSG_TYPE_SHUTDOWN);
+	comm_channel_unregister_msg_cb(drv_ctx->comm_channel_h,
+				       COMM_MSG_TYPE_BOOTSTRAP);
+	comm_channel_deinit(&drv_ctx->comm_channel_h);
+	pci_client_deinit(&drv_ctx->pci_client_h);
+	free_outbound_area(epf, &drv_ctx->peer_mem);
+	free_inbound_area(epf, &drv_ctx->self_mem);
+}
+
+static int
+nvscic2c_pcie_epf_bind(struct pci_epf *epf)
+{
+	int ret = 0;
+	size_t win_size = 0;
+	struct pci_epf_bar *epf_bar = NULL;
+	struct driver_ctx_t *drv_ctx = NULL;
+	struct pci_client_params params = {0};
+	struct callback_ops cb_ops = {0};
+
+	if (!epf)
+		return -EINVAL;
+
+	drv_ctx = epf_get_drvdata(epf);
+	if (!drv_ctx)
+		return -EINVAL;
+
+	/*
+	 * device-tree node has edma phandle, user must bind
+	 * the function to the same pcie controller.
+	 */
+	if (drv_ctx->drv_param.edma_np != epf->epc->dev.parent->of_node) {
+		pr_err("epf:(%s) is not bounded to correct controller\n",
+		       epf->name);
+		return -EINVAL;
+	}
+
+	win_size = drv_ctx->drv_param.bar_win_size;
+	ret = allocate_inbound_area(epf, win_size, &drv_ctx->self_mem);
+	if (ret)
+		goto err_alloc_inbound;
+
+	ret = allocate_outbound_area(epf, win_size, &drv_ctx->peer_mem);
+	if (ret)
+		goto err_alloc_outbound;
+
+	params.dev = epf->epc->dev.parent;
+	params.self_mem = &drv_ctx->self_mem;
+	params.peer_mem = &drv_ctx->peer_mem;
+	ret = pci_client_init(&params, &drv_ctx->pci_client_h);
+	if (ret) {
+		pr_err("pci_client_init() failed\n");
+		goto err_pci_client;
+	}
+	pci_client_save_driver_ctx(drv_ctx->pci_client_h, drv_ctx);
+	/*
+	 * setup of comm-channel must be done in bind() for @DRV_MODE_EPC
+	 * to share bootstrap message. register for message from @DRV_MODE_EPC
+	 * (PCIe RP).
+	 */
+	ret = comm_channel_init(drv_ctx, &drv_ctx->comm_channel_h);
+	if (ret) {
+		pr_err("Failed to initialize comm-channel\n");
+		goto err_comm_init;
+	}
+
+	/* register for bootstrap message from @DRV_MODE_EPC (PCIe RP).*/
+	cb_ops.callback = bootstrap_msg_cb;
+	cb_ops.ctx = (void *)epf;
+	ret = comm_channel_register_msg_cb(drv_ctx->comm_channel_h,
+					   COMM_MSG_TYPE_BOOTSTRAP, &cb_ops);
+	if (ret) {
+		pr_err("Failed to register for bootstrap message from RP\n");
+		goto err_register_msg;
+	}
+
+	/* register for shutdown message from @DRV_MODE_EPC (PCIe RP).*/
+	memset(&cb_ops, 0x0, sizeof(cb_ops));
+	cb_ops.callback = shutdown_msg_cb;
+	cb_ops.ctx = (void *)epf;
+	ret = comm_channel_register_msg_cb(drv_ctx->comm_channel_h,
+					   COMM_MSG_TYPE_SHUTDOWN, &cb_ops);
+	if (ret) {
+		pr_err("Failed to register for shutdown message from RP\n");
+		goto err_register_msg;
+	}
+
+	/* register for link message from @DRV_MODE_EPC (PCIe RP).*/
+	memset(&cb_ops, 0x0, sizeof(cb_ops));
+	cb_ops.callback = link_msg_cb;
+	cb_ops.ctx = (void *)epf;
+	ret = comm_channel_register_msg_cb(drv_ctx->comm_channel_h,
+					   COMM_MSG_TYPE_LINK, &cb_ops);
+	if (ret) {
+		pr_err("Failed to register for link message from RP\n");
+		goto err_register_msg;
+	}
+
+	/* BAR:0 settings. - done here to save time in CORE_INIT.*/
+	epf_bar = &epf->bar[BAR_0];
+	epf_bar->phys_addr = drv_ctx->self_mem.dma_handle;
+	epf_bar->size = drv_ctx->self_mem.size;
+	epf_bar->barno = BAR_0;
+	epf_bar->flags |= PCI_BASE_ADDRESS_SPACE_MEMORY |
+			  PCI_BASE_ADDRESS_MEM_TYPE_64 |
+			  PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+	return ret;
+
+err_register_msg:
+	comm_channel_deinit(&drv_ctx->comm_channel_h);
+
+err_comm_init:
+	pci_client_deinit(&drv_ctx->pci_client_h);
+
+err_pci_client:
+	free_outbound_area(epf, &drv_ctx->peer_mem);
+
+err_alloc_outbound:
+	free_inbound_area(epf, &drv_ctx->self_mem);
+
+err_alloc_inbound:
+	return ret;
+}
+
+static void
+nvscic2c_pcie_epf_remove(struct pci_epf *epf)
+{
+	struct driver_ctx_t *drv_ctx = epf_get_drvdata(epf);
+
+	if (!drv_ctx)
+		return;
+
+	cancel_work_sync(&drv_ctx->epf_ctx->deinitialization_work);
+	cancel_work_sync(&drv_ctx->epf_ctx->initialization_work);
+	epf->header = NULL;
+	kfree(drv_ctx->epf_ctx);
+
+	dt_release(&drv_ctx->drv_param);
+
+	epf_set_drvdata(epf, NULL);
+	kfree_const(drv_ctx->drv_name);
+	kfree(drv_ctx);
+}
+
+static kernel_ulong_t
+get_driverdata(const struct pci_epf_device_id *id,
+	       const struct pci_epf *epf)
+{
+	while (id->name[0]) {
+		if (strcmp(epf->name, id->name) == 0)
+			return id->driver_data;
+		id++;
+	}
+
+	return 0;
+}
+
+static const struct pci_epc_event_ops nvscic2c_event_ops = {
+	.core_init = nvscic2c_pcie_epf_core_init,
+	.core_deinit = nvscic2c_pcie_epf_core_deinit,
+};
+
+static int
+nvscic2c_pcie_epf_probe(struct pci_epf *epf)
+{
+	int ret = 0;
+	char *name = NULL;
+	u32 pci_dev_id = 0x0;
+	struct driver_ctx_t *drv_ctx = NULL;
+	struct epf_context_t *epf_ctx = NULL;
+
+	/* get pci device id from epf name.*/
+	pci_dev_id = (u32)get_driverdata(nvscic2c_pcie_epf_ids, epf);
+	if (!pci_dev_id)
+		return -EINVAL;
+
+	/* allocate module context.*/
+	drv_ctx = kzalloc(sizeof(*drv_ctx), GFP_KERNEL);
+	if (WARN_ON(!drv_ctx))
+		return -ENOMEM;
+
+	name = kasprintf(GFP_KERNEL, "%s-%x", DRIVER_NAME_EPF, pci_dev_id);
+	if (WARN_ON(!name)) {
+		kfree(drv_ctx);
+		return -ENOMEM;
+	}
+
+	drv_ctx->drv_mode = DRV_MODE_EPF;
+	drv_ctx->drv_name = name;
+	epf_set_drvdata(epf, drv_ctx);
+
+	/* check for the device tree node against this Id, must be only one.*/
+	ret = dt_parse(pci_dev_id, drv_ctx->drv_mode, &drv_ctx->drv_param);
+	if (ret)
+		goto err_dt_parse;
+
+	/* allocate nvscic2c-pcie epf only context.*/
+	epf_ctx = kzalloc(sizeof(*epf_ctx), GFP_KERNEL);
+	if (WARN_ON(!epf_ctx)) {
+		ret = -ENOMEM;
+		goto err_alloc_epf_ctx;
+	}
+	drv_ctx->epf_ctx = epf_ctx;
+	epf_ctx->header.vendorid = PCI_VENDOR_ID_NVIDIA;
+	epf_ctx->header.deviceid = pci_dev_id;
+	epf_ctx->header.baseclass_code = PCI_BASE_CLASS_COMMUNICATION;
+	epf_ctx->header.interrupt_pin = PCI_INTERRUPT_INTA;
+
+	epf->event_ops = &nvscic2c_event_ops;
+	epf->header = &epf_ctx->header;
+
+	/* to initialize NvSciC2cPcie interfaces on bootstrap msg.*/
+	epf_ctx->drv_ctx = drv_ctx;
+	epf_ctx->epf = epf;
+	INIT_WORK(&epf_ctx->initialization_work, init_work);
+	INIT_WORK(&epf_ctx->deinitialization_work, deinit_work);
+
+	/* to synchronize deinit, unbind.*/
+	atomic_set(&epf_ctx->core_initialized, 0);
+	atomic_set(&drv_ctx->epf_ctx->epf_initialized, 0);
+	init_waitqueue_head(&epf_ctx->core_initialized_waitq);
+
+	return ret;
+
+err_alloc_epf_ctx:
+	dt_release(&drv_ctx->drv_param);
+
+err_dt_parse:
+	epf_set_drvdata(epf, NULL);
+	kfree_const(drv_ctx->drv_name);
+	kfree(drv_ctx);
+
+	return ret;
+}
+
+static struct pci_epf_ops ops = {
+	.unbind = nvscic2c_pcie_epf_unbind,
+	.bind   = nvscic2c_pcie_epf_bind,
+};
+
+static struct pci_epf_driver nvscic2c_pcie_epf_driver = {
+	.driver.name = DRIVER_NAME_EPF,
+	.probe       = nvscic2c_pcie_epf_probe,
+	.remove      = nvscic2c_pcie_epf_remove,
+	.id_table    = nvscic2c_pcie_epf_ids,
+	.ops         = &ops,
+	.owner       = THIS_MODULE,
+};
+
+static int
+__init nvscic2c_pcie_epf_init(void)
+{
+	return pci_epf_register_driver(&nvscic2c_pcie_epf_driver);
+}
+module_init(nvscic2c_pcie_epf_init);
+
+static void
+__exit nvscic2c_pcie_epf_deinit(void)
+{
+	pci_epf_unregister_driver(&nvscic2c_pcie_epf_driver);
+}
+module_exit(nvscic2c_pcie_epf_deinit);
+
+#define DRIVER_LICENSE		"GPL v2"
+#define DRIVER_DESCRIPTION	"NVIDIA Chip-to-Chip transfer module for PCIeEP"
+#define DRIVER_AUTHOR		"Nvidia Corporation"
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+MODULE_LICENSE(DRIVER_LICENSE);
+MODULE_AUTHOR(DRIVER_AUTHOR);
diff --git a/drivers/misc/nvscic2c-pcie/iova-alloc.c b/drivers/misc/nvscic2c-pcie/iova-alloc.c
new file mode 100644
index 00000000..ad937892
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/iova-alloc.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt)	"nvscic2c-pcie: iova-alloc: " fmt
+#include <linux/iommu.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include "iova-alloc.h"
+
+struct iova_alloc_domain_t {
+	struct device *dev;
+	struct iommu_domain *domain;
+	struct iova_domain iovad;
+};
+
+static int
+iovad_init(struct device *dev, struct iova_alloc_domain_t **ivd_h)
+{
+	int ret = 0;
+	dma_addr_t start = 0;
+	unsigned long order = 0;
+	struct iova_alloc_domain_t *ivd_ctx = NULL;
+	struct iommu_domain_geometry *geometry = NULL;
+
+	ivd_ctx = kzalloc(sizeof(*ivd_ctx), GFP_KERNEL);
+	if (WARN_ON(!ivd_ctx))
+		return -ENOMEM;
+
+	ivd_ctx->dev = dev;
+	ret = iova_cache_get();
+	if (ret < 0)
+		return ret;
+
+	ivd_ctx->domain = iommu_get_domain_for_dev(dev);
+	if (!ivd_ctx->domain) {
+		ret = -EINVAL;
+		pr_err("iommu_get_domain_for_dev() failed.\n");
+		goto put_cache;
+	}
+
+	geometry = &ivd_ctx->domain->geometry;
+	start = geometry->aperture_start & dev->coherent_dma_mask;
+	order = __ffs(ivd_ctx->domain->pgsize_bitmap);
+	pr_debug("Order of address allocation for IOVA domain: %lu\n", order);
+	init_iova_domain(&ivd_ctx->iovad, 1UL << order, start >> order);
+
+	*ivd_h = ivd_ctx;
+
+	return ret;
+put_cache:
+	iova_cache_put();
+	return ret;
+}
+
+static void
+iovad_deinit(struct iova_alloc_domain_t **ivd_h)
+{
+	struct iova_alloc_domain_t *ivd_ctx = NULL;
+
+	ivd_ctx = (struct iova_alloc_domain_t *)(*ivd_h);
+	put_iova_domain(&ivd_ctx->iovad);
+	iova_cache_put();
+	kfree(ivd_ctx);
+	*ivd_h = NULL;
+}
+
+int
+iova_alloc_init(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		struct iova_alloc_domain_t **ivd_h)
+{
+	int ret = 0;
+	unsigned long shift = 0U;
+	unsigned long iova = 0U;
+	unsigned long iova_len = 0U;
+	dma_addr_t dma_limit = 0x0;
+	struct iova_alloc_domain_t *ivd_ctx = NULL;
+	struct iommu_domain_geometry *geometry = NULL;
+
+	if (WARN_ON(!dev || !dma_handle || !ivd_h || *ivd_h))
+		return -EINVAL;
+
+	ret = iovad_init(dev, &ivd_ctx);
+	if (ret < 0) {
+		pr_err("Failed in allocating IOVA domain: %d\n", ret);
+		return ret;
+	}
+
+	geometry = &ivd_ctx->domain->geometry;
+	dma_limit = ivd_ctx->dev->coherent_dma_mask;
+	shift = iova_shift(&ivd_ctx->iovad);
+	iova_len = size >> shift;
+
+	/* Recommendation is to allocate in power of 2.*/
+	if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
+		iova_len = roundup_pow_of_two(iova_len);
+
+	if (*ivd_ctx->dev->dma_mask)
+		dma_limit &= *ivd_ctx->dev->dma_mask;
+
+	if (geometry->force_aperture)
+		dma_limit = min(dma_limit, geometry->aperture_end);
+
+	/* Try to get PCI devices a SAC address */
+	if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(ivd_ctx->dev))
+		iova = alloc_iova_fast(&ivd_ctx->iovad, iova_len,
+				       DMA_BIT_MASK(32) >> shift, false);
+	if (!iova)
+		iova = alloc_iova_fast(&ivd_ctx->iovad, iova_len,
+				       dma_limit >> shift, true);
+
+	*dma_handle = (dma_addr_t)iova << shift;
+	*ivd_h = ivd_ctx;
+
+	return ret;
+}
+
+void
+iova_alloc_deinit(dma_addr_t dma_handle, size_t size,
+		  struct iova_alloc_domain_t **ivd_h)
+{
+	struct iova_domain *iovad = NULL;
+	struct iova_alloc_domain_t *ivd_ctx = NULL;
+
+	if (!ivd_h || !(*ivd_h) || !dma_handle)
+		return;
+
+	ivd_ctx = *ivd_h;
+	iovad = &ivd_ctx->iovad;
+
+	free_iova_fast(iovad, iova_pfn(iovad, dma_handle),
+		       size >> iova_shift(iovad));
+
+	iovad_deinit(ivd_h);
+}
diff --git a/drivers/misc/nvscic2c-pcie/iova-alloc.h b/drivers/misc/nvscic2c-pcie/iova-alloc.h
new file mode 100644
index 00000000..2812bf23
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/iova-alloc.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef __IOVA_ALLOC_H__
+#define __IOVA_ALLOC_H__
+
+#include <linux/device.h>
+#include <linux/iova.h>
+
+struct iova_alloc_domain_t;
+
+/*
+ * iova_alloc_init
+ *
+ * With NvSciC2c usecase IOVA range needs to be allocated without
+ * physical backing. Existing DMA API framework does not allow this.
+ * Hence allocate new IOVA domain to allocate IOVA range.
+ *
+ * Allocate IOVA range using new IOVA domain.
+ * Use this IOVA range in iommu_map() with existing IOMMU domain.
+ */
+int
+iova_alloc_init(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		struct iova_alloc_domain_t **ivd_h);
+
+/*
+ * iova_alloc_deinit
+ *
+ * Free IOVA range allocated using iova_alloc_init.
+ * Client needs to make sure that if physical mapping was created
+ * then it is released before calling iova_alloc_deinit.
+
+ * Release IOVA domain allocated in iova_alloc_init.
+ */
+void
+iova_alloc_deinit(dma_addr_t dma_handle, size_t size,
+		  struct iova_alloc_domain_t **ivd_h);
+#endif //__IOVA_ALLOC_H__
diff --git a/drivers/misc/nvscic2c-pcie/iova-mngr.c b/drivers/misc/nvscic2c-pcie/iova-mngr.c
new file mode 100644
index 00000000..c6abe044
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/iova-mngr.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#define pr_fmt(fmt)	"nvscic2c-pcie: iova-mgr: " fmt
+
+#include <linux/errno.h>
+#include <linux/mutex.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "common.h"
+#include "iova-mngr.h"
+
+/*
+ * INTERNAL DataStructure that define a single IOVA block/chunk
+ * in a pool of IOVA region managed by IOVA manager.
+ *
+ * IOVA manager chunks entire IOVA space into these blocks/chunks.
+ *
+ * The chunk/block is also a node for linking to previous and next
+ * nodes of a circular doubly linked list - free or reserved.
+ */
+struct block_t {
+	/* for management of this chunk in either avail or reserve lists.*/
+	struct list_head node;
+
+	/* block address.*/
+	u64 address;
+
+	/* block size.*/
+	size_t size;
+};
+
+/*
+ * INTERNAL datastructure for IOVA space manager.
+ *
+ * IOVA space manager would fragment and manage the IOVA region
+ * using two circular doubly linked lists - reserved list and
+ * free list. These lists contain blocks/chunks reserved
+ * or free for use by clients (callers) from the overall
+ * IOVA region the IOVA manager was configured with.
+ */
+struct mngr_ctx_t {
+	/*
+	 * For debug purpose only, all other usage prohibited.
+	 * In the event there are multiple iova_managers within
+	 * an LKM instance, name helps in identification.
+	 */
+	char name[NAME_MAX];
+
+	/*
+	 * Circular doubly linked list of blocks indicating
+	 * available/free IOVA space(s). When IOVA manager is
+	 * initialised all of the IOVA space is marked as available
+	 * to begin with.
+	 */
+	struct list_head *free_list;
+
+	/*
+	 * Book-keeping of the user IOVA blocks in a circular double
+	 * linked list.
+	 */
+	struct list_head *reserved_list;
+
+	/* Ensuring reserve, free and the list operations are serialized.*/
+	struct mutex lock;
+
+	/* base address memory manager is configured with. */
+	u64 base_address;
+};
+
+/*
+ * Reserves a block from the free IOVA regions. Once reserved, the block
+ * is marked reserved and appended in the reserved list (no ordering
+ * required and trying to do so shall increase the time)
+ */
+int
+iova_mngr_block_reserve(void *mngr_handle, size_t size,
+			u64 *address, size_t *offset,
+			void **block_handle)
+{
+	struct mngr_ctx_t *ctx = (struct mngr_ctx_t *)(mngr_handle);
+	struct block_t *reserve = NULL, *curr = NULL, *best = NULL;
+	int ret = 0;
+
+	if (WARN_ON(!ctx || *block_handle || !size))
+		return -EINVAL;
+
+	mutex_lock(&ctx->lock);
+
+	/* if there are no free blocks to reserve. */
+	if (list_empty(ctx->free_list)) {
+		ret = -ENOMEM;
+		pr_err("(%s): No memory available to reserve block of size:(%lu)\n",
+		       ctx->name, size);
+		goto err;
+	}
+
+	/* find the best of all free bocks to reserve.*/
+	list_for_each_entry(curr, ctx->free_list, node) {
+		if (curr->size >= size) {
+			if (!best)
+				best = curr;
+			else if (curr->size < best->size)
+				best = curr;
+		}
+	}
+
+	/* if there isn't any free block of requested size. */
+	if (!best) {
+		ret = -ENOMEM;
+		pr_err("(%s): No enough mem available to reserve block sz:(%lu)\n",
+		       ctx->name, size);
+		goto err;
+	} else {
+		struct block_t *found = NULL;
+
+		/* perfect fit.*/
+		if (best->size == size) {
+			list_del(&best->node);
+			list_add_tail(&best->node, ctx->reserved_list);
+			found = best;
+		} else {
+			/* chunk out a new block, adjust the free block.*/
+			reserve = kzalloc(sizeof(*reserve), GFP_KERNEL);
+			if (WARN_ON(!reserve)) {
+				ret = -ENOMEM;
+				goto err;
+			}
+			reserve->address = best->address;
+			reserve->size = size;
+			best->address += size;
+			best->size -= size;
+			list_add_tail(&reserve->node, ctx->reserved_list);
+			found = reserve;
+		}
+		*block_handle = (void *)(found);
+
+		if (address)
+			*address = found->address;
+		if (offset)
+			*offset = (found->address - ctx->base_address);
+	}
+err:
+	mutex_unlock(&ctx->lock);
+	return ret;
+}
+
+/*
+ * Release an already reserved IOVA block/chunk by the caller back to
+ * free list.
+ */
+int
+iova_mngr_block_release(void *mngr_handle, void **block_handle)
+{
+	struct mngr_ctx_t *ctx = (struct mngr_ctx_t *)(mngr_handle);
+	struct block_t *release = (struct block_t *)(*block_handle);
+	struct block_t *curr = NULL, *prev = NULL;
+	bool done = false;
+	int ret = 0;
+
+	if (!ctx || !release)
+		return -EINVAL;
+
+	mutex_lock(&ctx->lock);
+
+	list_for_each_entry(curr, ctx->free_list, node) {
+		if (release->address < curr->address) {
+			/* if the immediate next node is available for merge.*/
+			if (curr->address == release->address + release->size) {
+				curr->address = release->address;
+				curr->size += release->size;
+				list_del(&release->node);
+				kfree(release);
+				/*
+				 * if the immediate previous node is also
+				 * available for merge.
+				 */
+				if ((prev) &&
+				    ((prev->address + prev->size)
+				      == curr->address)) {
+					prev->size += curr->size;
+					list_del(&curr->node);
+					kfree(curr);
+				}
+			} else if ((prev) &&
+				   ((prev->address + prev->size)
+				    == release->address)) {
+				/*
+				 * if only the immediate prev node is available
+				 */
+				prev->size += release->size;
+				list_del(&release->node);
+				kfree(release);
+			} else {
+				/*
+				 * cannot be merged with either the immediate
+				 * prev or the immediate next node. Add it in the
+				 * free list before the current node.
+				 */
+				list_del(&release->node);
+				list_add_tail(&release->node, &curr->node);
+			}
+			done = true;
+			break;
+		}
+		prev = curr;
+	}
+
+	/*
+	 * Even if after traversing each entry in list, we could not
+	 * add the block to be released back in free list, because:
+	 */
+	if (!done) {
+		if (!list_empty(ctx->free_list)) {
+			/*
+			 * The block to be freed has the highest order
+			 * address of all the existing blocks in free list.
+			 */
+			struct block_t *last =
+			list_last_entry(ctx->free_list, struct block_t, node);
+			if ((last->address + last->size) == release->address) {
+				/* can be merged with last node of list.*/
+				last->size += release->size;
+				list_del(&release->node);
+				kfree(release);
+			} else {
+				/* cannot be merged, add as the last node.*/
+				list_del(&release->node);
+				list_add_tail(&release->node, ctx->free_list);
+			}
+		} else {
+			/* free list was empty.*/
+			list_del(&release->node);
+			list_add_tail(&release->node, ctx->free_list);
+		}
+	}
+	*block_handle = NULL;
+
+	mutex_unlock(&ctx->lock);
+	return ret;
+}
+
+/*
+ * iova_mngr_print
+ *
+ * DEBUG only.
+ *
+ * Helper function to print all the reserved and free blocks with
+ * their names, size and start address.
+ */
+void
+iova_mngr_print(void *mngr_handle)
+{
+	struct mngr_ctx_t *ctx = (struct mngr_ctx_t *)(mngr_handle);
+	struct block_t *block = NULL;
+
+	if (ctx) {
+		mutex_lock(&ctx->lock);
+		pr_debug("(%s): Reserved\n", ctx->name);
+		list_for_each_entry(block, ctx->reserved_list, node) {
+			pr_debug("\t\t (%s): address = 0x%pa[p], size = 0x%lx\n",
+				 ctx->name, &block->address, block->size);
+		}
+		pr_debug("(%s): Free\n", ctx->name);
+		list_for_each_entry(block, ctx->free_list, node) {
+			pr_debug("\t\t (%s): address = 0x%pa[p], size = 0x%lx\n",
+				 ctx->name, &block->address, block->size);
+		}
+		mutex_unlock(&ctx->lock);
+	}
+}
+
+/*
+ * Initialises the IOVA space manager with the base address + size
+ * provided. IOVA manager would use two lists for book-keeping reserved
+ * memory blocks and free memory blocks.
+ *
+ * When initialised all of the IOVA region: base_address + size is free.
+ */
+int
+iova_mngr_init(char *name, u64 base_address, size_t size, void **mngr_handle)
+{
+	int ret = 0;
+	struct block_t *block = NULL;
+	struct mngr_ctx_t *ctx = NULL;
+
+	if (WARN_ON(!base_address || !size ||
+		    !mngr_handle || *mngr_handle || !name))
+		return -EINVAL;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (WARN_ON(!ctx)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ctx->free_list = kzalloc(sizeof(*ctx->free_list), GFP_KERNEL);
+	if (WARN_ON(!ctx->free_list)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ctx->reserved_list = kzalloc(sizeof(*ctx->reserved_list), GFP_KERNEL);
+	if (WARN_ON(!ctx->reserved_list)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	if (strlen(name) > (NAME_MAX - 1)) {
+		ret = -EINVAL;
+		pr_err("name: (%s) long, max char:(%u)\n", name, (NAME_MAX - 1));
+		goto err;
+	}
+	strcpy(ctx->name, name);
+	INIT_LIST_HEAD(ctx->reserved_list);
+	INIT_LIST_HEAD(ctx->free_list);
+	mutex_init(&ctx->lock);
+	ctx->base_address = base_address;
+
+	/* add the base_addrss+size as one whole free block.*/
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (WARN_ON(!block)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	block->address = base_address;
+	block->size = size;
+	list_add(&block->node, ctx->free_list);
+
+	*mngr_handle = ctx;
+	return ret;
+err:
+	iova_mngr_deinit((void **)(&ctx));
+	return ret;
+}
+
+/*
+ * iova_mngr_deinit
+ *
+ * deinitialize the IOVA space manager. Any blocks unreturned from the client
+ * (caller) shall become dangling.
+ */
+void
+iova_mngr_deinit(void **mngr_handle)
+{
+	struct block_t *block = NULL;
+	struct list_head *curr = NULL, *next = NULL;
+	struct mngr_ctx_t *ctx = (struct mngr_ctx_t *)(*mngr_handle);
+
+	if (ctx) {
+		/* debug only to ensure, lists do not have dangling data left.*/
+		iova_mngr_print(*mngr_handle);
+
+		/* ideally, all blocks should have returned before this.*/
+		if (!list_empty(ctx->reserved_list)) {
+			list_for_each_safe(curr, next, ctx->reserved_list) {
+				block = list_entry(curr, struct block_t, node);
+				iova_mngr_block_release(*mngr_handle,
+							(void **)(&block));
+			}
+		}
+
+		/* ideally, just one whole free block should remain as free.*/
+		list_for_each_safe(curr, next, ctx->free_list) {
+			block = list_entry(curr, struct block_t, node);
+			list_del(&block->node);
+			kfree(block);
+		}
+
+		mutex_destroy(&ctx->lock);
+		kfree(ctx->reserved_list);
+		kfree(ctx->free_list);
+		kfree(ctx);
+		*mngr_handle = NULL;
+	}
+}
diff --git a/drivers/misc/nvscic2c-pcie/iova-mngr.h b/drivers/misc/nvscic2c-pcie/iova-mngr.h
new file mode 100644
index 00000000..a174be5d
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/iova-mngr.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __IOVA_MNGR_H__
+#define __IOVA_MNGR_H__
+
+#include <linux/types.h>
+
+/*
+ * iova_mngr_block_reserve
+ *
+ * Reserves a block from the free IOVA regions. Once reserved, the block
+ * is marked reserved and appended in the reserved list. Use
+ * iova_mngr_block_get_address to fetch the address of the block reserved.
+ */
+int
+iova_mngr_block_reserve(void *mngr_handle, size_t size,
+			u64 *address, size_t *offset,
+			void **block_handle);
+
+/*
+ * iova_mngr_block_release
+ *
+ * Release an already reserved IOVA block/chunk by the caller back to
+ * free list.
+ */
+int
+iova_mngr_block_release(void *mngr_handle, void **block_handle);
+
+/*
+ * iova_mngr_print
+ *
+ * DEBUG only.
+ *
+ * Helper function to print all the reserved and free blocks with
+ * their names, size and start address.
+ */
+void iova_mngr_print(void *handle);
+
+/*
+ * iova_mngr_init
+ *
+ * Initialises the IOVA space manager with the base address + size
+ * provided. IOVA manager would use two lists for book-keeping reserved
+ * memory blocks and free memory blocks.
+ *
+ * When initialised all of the IOVA region: base_address + size is free.
+ */
+int
+iova_mngr_init(char *name, u64 base_address, size_t size, void **mngr_handle);
+
+/*
+ * iova_mngr_deinit
+ *
+ * deinitialize the IOVA space manager. Any blocks unreturned from the client
+ * (caller) shall become dangling.
+ */
+void
+iova_mngr_deinit(void **handle);
+
+#endif //__IOVA_MNGR_H__
diff --git a/drivers/misc/nvscic2c-pcie/module.h b/drivers/misc/nvscic2c-pcie/module.h
new file mode 100644
index 00000000..3c9e4953
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/module.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+/*
+ * Internal to nvscic2c-pcie module. This file is not supposed to be included
+ * by any other external modules.
+ */
+#ifndef __MODULE_H__
+#define __MODULE_H__
+
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/pci-epc.h>
+#include <linux/pci-epf.h>
+
+#include "common.h"
+
+/* forward declaration.*/
+struct device_node;
+struct platform_device;
+
+/*
+ * Parameters for the nvscic2c-pcie module and it's endpoints.
+ *
+ * These are read-only for the rest of the nvscic2c-pcie module.
+ */
+struct driver_param_t {
+	/* This is (nvscic2c-pcie) device tree node as found in device tree.*/
+	struct device_node *of_node;
+
+	/* driver mode as parsed from compatible string in device-tree.*/
+	enum drv_mode_t drv_mode;
+
+	/* nvscic2c-pcie device tree node.*/
+	struct platform_device *pdev;
+
+	/* host1x device node and pdev via phandle.*/
+	struct device_node *host1x_np;
+	struct platform_device *host1x_pdev;
+
+	/* device node of pcie controller via phandle.*/
+	struct device_node *edma_np;
+
+	/* pci endpoint device id.*/
+	u32 pci_dev_id;
+
+	/* bar window size. - applicable only for epf.*/
+	u32 bar_win_size;
+
+	/* node information, Board+SoC Id.*/
+	struct node_info_t local_node;
+	struct node_info_t peer_node;
+
+	/*
+	 * Properties that each endpoint shall be configured with.
+	 * These properties are populated from device tree node.
+	 */
+	u8 nr_endpoint;
+	struct endpoint_prop_t {
+		/* Endpoint ID as populated from device tree node. */
+		u8 id;
+
+		/*
+		 * Human readable name of the endpoint - char dev node shall be
+		 * instanced using this name.
+		 */
+		char name[NAME_MAX];
+
+		/* Frames and per frame size.*/
+		u8 nframes;
+		u32 frame_sz;
+	} endpoint_props[MAX_ENDPOINTS];
+};
+
+/* nvscic2c-pcie epf specific context. */
+struct epf_context_t {
+	bool notifier_registered;
+	struct pci_epf_header header;
+	struct pci_epf *epf;
+	void *drv_ctx;
+	struct work_struct initialization_work;
+	struct work_struct deinitialization_work;
+	atomic_t core_initialized;
+	atomic_t epf_initialized;
+	wait_queue_head_t core_initialized_waitq;
+};
+
+/* nvscic2c-pcie epc specific context. */
+struct epc_context_t {
+	struct completion epf_ready_cmpl;
+	struct completion epf_shutdown_cmpl;
+};
+
+/*
+ * nvscic2c-pcie module context.
+ * Contains all the information for all
+ *  - Configuration parameters per device-tree.
+ */
+struct driver_ctx_t {
+	/* driver mode as parsed from compatible string in device-tree.*/
+	enum drv_mode_t drv_mode;
+	char *drv_name;
+
+	/* the configuration for module and it's endpoints.*/
+	struct driver_param_t drv_param;
+
+	/*
+	 * Visible region to peer SoC for PCIe writes. In nvscic2c-pcie
+	 * use-cases, it is backed by physical memory allocated for
+	 * comm-channel, endpoints and stream-objs(mem and sync), etc.
+	 *
+	 * Peer's write lands here to be read by local/self.
+	 */
+	struct dma_buff_t self_mem;
+
+	/*
+	 * Point to peer's visible region for data-writes. This is a PCIe
+	 * aperture which allows local/self to write into peer's memory.
+	 */
+	struct pci_aper_t peer_mem;
+
+	/* pci-client abstraction handle.*/
+	void *pci_client_h;
+
+	/* comm-channel abstraction. */
+	void *comm_channel_h;
+
+	/* vmap abstraction, this can be moved within endpoints.*/
+	void *vmap_h;
+
+	/* tegra-pcie-edma module handle.*/
+	void *edma_h;
+
+	/* endpoint absraction handle.*/
+	void *endpoints_h;
+
+	/* DRV_MODE specific.*/
+	struct epf_context_t *epf_ctx;
+	struct epc_context_t *epc_ctx;
+
+	/* peer cpu */
+	enum peer_cpu_t  peer_cpu;
+
+	/* IOVA alloc abstraction.*/
+	struct iova_alloc_domain_t *ivd_h;
+};
+
+/*
+ * Look-up device tree node for the compatible string. Check for the
+ * pci-dev-id within the compatible node, if more than one such node found also
+ * return error.
+ */
+int
+dt_parse(u32 pci_dev_id, enum drv_mode_t drv_mode,
+	 struct driver_param_t *drv_param);
+
+/* Release any memory allocated during dt_parse().*/
+int
+dt_release(struct driver_param_t *drv_param);
+#endif //__MODULE_H__
diff --git a/drivers/misc/nvscic2c-pcie/pci-client.c b/drivers/misc/nvscic2c-pcie/pci-client.c
new file mode 100644
index 00000000..bcb24cfe
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/pci-client.c
@@ -0,0 +1,693 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt)	"nvscic2c-pcie: pci-client: " fmt
+
+#include <linux/dma-buf.h>
+#include <linux/dma-iommu.h>
+#include <linux/dma-map-ops.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pci-epc.h>
+#include <linux/pci-epf.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <asm/cacheflush.h>
+
+#include <uapi/misc/nvscic2c-pcie-ioctl.h>
+#include <linux/tegra-pcie-edma.h>
+
+#include "common.h"
+#include "iova-mngr.h"
+#include "pci-client.h"
+
+/* Anticipate as many users as endpoints in worst-case. */
+#define MAX_LINK_EVENT_USERS	(MAX_ENDPOINTS)
+
+/*
+ * The size of  x86/peer edma rx descriptor
+ */
+#define EDMA_CH_DESC_SZ		(60 * SZ_1K)
+
+/* Internal private data-structure as PCI client. */
+struct pci_client_t {
+	struct device *dev;
+	struct iommu_domain *domain;
+
+	/* Recv area. Peer's write reflect here. */
+	struct dma_buff_t *self_mem;
+
+	/* Send area. PCIe aperture area. Self's Write reach Peer via this.*/
+	struct pci_aper_t *peer_mem;
+
+	/* PCI link status memory. mmap() to user-space.*/
+	atomic_t link_status;
+	struct cpu_buff_t link_status_mem;
+
+	/*
+	 * Lock to guard users getting un/registered and link status change
+	 * invocation at the same time. Also, to protect table from concurrent
+	 * access.
+	 */
+	struct mutex event_tbl_lock;
+
+	/* Table of users registered for change in PCI link status. */
+	struct event_t {
+		/* is taken.*/
+		atomic_t in_use;
+
+		/* callback to invoke when change in status is seen.*/
+		struct callback_ops cb_ops;
+	} event_tbl[MAX_LINK_EVENT_USERS];
+
+	/*
+	 * Skip reserved iova for use. This area in BAR0 aperture is reserved for
+	 * GIC SPI interrupt mechanism. As the allocation, fragmentration
+	 * of iova must be identical on both @DRV_MODE_EPF and @DRV_MODE_EPC
+	 * skip this area for use in @DRV_MODE_EPC also. We skip by reserving
+	 * the iova region and thereby marking it as unusable.
+	 */
+	dma_addr_t edma_ch_desc_iova;
+	bool edma_ch_desc_iova_mapped;
+	void *edma_ch_desc_pva;
+	void *skip_iova;
+	void *skip_meta;
+	void *edma_ch_desc_iova_h;
+	/*
+	 * iova-mngr instance for managing the reserved iova region.
+	 * application allocated objs and endpoints allocated physical memory
+	 * are pinned to this address.
+	 */
+	void *mem_mngr_h;
+
+	/*
+	 * the context of DRV_MODE_EPC/DRV_MODE_EPF
+	 */
+	struct driver_ctx_t *drv_ctx;
+
+};
+
+static void
+free_link_status_mem(struct pci_client_t *ctx)
+{
+	if (!ctx || !ctx->link_status_mem.pva)
+		return;
+
+	kfree(ctx->link_status_mem.pva);
+	ctx->link_status_mem.pva = NULL;
+}
+
+static int
+allocate_link_status_mem(struct pci_client_t *ctx)
+{
+	int ret = 0;
+	struct cpu_buff_t *mem = &ctx->link_status_mem;
+
+	mem->size = PAGE_ALIGN(sizeof(enum nvscic2c_pcie_link));
+	mem->pva  = kzalloc(mem->size, GFP_KERNEL);
+	if (WARN_ON(!mem->pva))
+		return -ENOMEM;
+
+	atomic_set(&ctx->link_status, NVSCIC2C_PCIE_LINK_DOWN);
+	*((enum nvscic2c_pcie_link *)mem->pva) = NVSCIC2C_PCIE_LINK_DOWN;
+
+	/* physical address to be mmap() in user-space.*/
+	mem->phys_addr = virt_to_phys(mem->pva);
+
+	return ret;
+}
+
+/* Free edma rx desc_iova that originally mapped from bar0 */
+static void
+free_edma_rx_desc_iova(struct pci_client_t *ctx)
+{
+	if (!ctx)
+		return;
+
+	if (ctx->skip_iova) {
+		iova_mngr_block_release(ctx->mem_mngr_h, &ctx->skip_iova);
+		ctx->skip_iova = NULL;
+	}
+
+	if (ctx->edma_ch_desc_iova_mapped) {
+		iommu_unmap(ctx->domain, ctx->edma_ch_desc_iova, EDMA_CH_DESC_SZ);
+		ctx->edma_ch_desc_iova_mapped = false;
+	}
+
+	if (ctx->edma_ch_desc_iova) {
+		iova_mngr_block_release(ctx->mem_mngr_h,
+					&ctx->edma_ch_desc_iova_h);
+		ctx->edma_ch_desc_iova_h = NULL;
+	}
+
+	if (ctx->edma_ch_desc_pva) {
+		free_pages_exact(ctx->edma_ch_desc_pva, EDMA_CH_DESC_SZ);
+		ctx->edma_ch_desc_pva = NULL;
+	}
+
+	if (ctx->skip_meta) {
+		iova_mngr_block_release(ctx->mem_mngr_h, &ctx->skip_meta);
+		ctx->skip_meta = NULL;
+	}
+}
+
+/* Allocate desc_iova and mapping to bar0 for remote edma, x86-orin c2c only */
+static int
+allocate_edma_rx_desc_iova(struct pci_client_t *ctx)
+{
+	int ret = 0;
+	int prot = 0;
+	u64 phys_addr = 0;
+
+	if (WARN_ON(!ctx))
+		return -EINVAL;
+
+	/*
+	 *bar0 first 128K [-------128k-------]
+	 *                [-4k-][-60k-][-64k-]
+	 *first 4K reserved for meta data communication
+	 *next 60k for x86/peer edma rx descriptor
+	 *next 64K resered for sys-sw
+	 */
+	ret = iova_mngr_block_reserve(ctx->mem_mngr_h, SZ_4K,
+				      NULL, NULL, &ctx->skip_meta);
+	if (ret) {
+		pr_err("Failed to skip the 4K reserved iova region\n");
+		goto err;
+	}
+	ctx->edma_ch_desc_pva = alloc_pages_exact(EDMA_CH_DESC_SZ, (GFP_KERNEL | __GFP_ZERO));
+	if (!ctx->edma_ch_desc_pva) {
+		pr_err("Failed to allocate a page with size of 60K\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+	phys_addr = page_to_phys(virt_to_page(ctx->edma_ch_desc_pva));
+	if (!phys_addr) {
+		pr_err("Failed to retrieve physical address\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+	ret = iova_mngr_block_reserve(ctx->mem_mngr_h, EDMA_CH_DESC_SZ,
+				      &ctx->edma_ch_desc_iova, NULL,
+				      &ctx->edma_ch_desc_iova_h);
+	if (ret) {
+		pr_err("Failed to reserve 60K iova space for remote edma desc\n");
+		goto err;
+	}
+	prot = (IOMMU_CACHE | IOMMU_READ | IOMMU_WRITE);
+	ret = iommu_map(ctx->domain, ctx->edma_ch_desc_iova, phys_addr, EDMA_CH_DESC_SZ, prot);
+	if (ret) {
+		pr_err("pci client failed to map iova to 60K physical backing\n");
+		goto err;
+	}
+	ctx->edma_ch_desc_iova_mapped = true;
+
+	/* bar0+64K - bar0+126K  reserved for sys-sw  */
+	ret = iova_mngr_block_reserve(ctx->mem_mngr_h, SZ_64K,
+				      NULL, NULL, &ctx->skip_iova);
+	if (ret) {
+		pr_err("Failed to skip the 64K reserved iova region\n");
+		goto err;
+	}
+	return ret;
+
+err:
+	free_edma_rx_desc_iova(ctx);
+	return ret;
+}
+
+int
+pci_client_init(struct pci_client_params *params, void **pci_client_h)
+{
+	u32 i = 0;
+	int ret = 0;
+	struct pci_client_t *ctx = NULL;
+
+	/* should not be an already instantiated pci client context. */
+	if (WARN_ON(!pci_client_h || *pci_client_h ||
+		    !params || !params->self_mem || !params->peer_mem ||
+		    !params->dev))
+		return -EINVAL;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (WARN_ON(!ctx))
+		return -ENOMEM;
+	ctx->dev = params->dev;
+	ctx->self_mem = params->self_mem;
+	ctx->peer_mem = params->peer_mem;
+	mutex_init(&ctx->event_tbl_lock);
+
+	/* for link event notifications. */
+	for (i = 0; i < MAX_LINK_EVENT_USERS; i++)
+		atomic_set(&ctx->event_tbl[i].in_use, 0);
+
+	ret = allocate_link_status_mem(ctx);
+	if (ret)
+		goto err;
+
+	/*
+	 * for mapping application objs and endpoint physical memory to remote
+	 * visible area.
+	 */
+	ctx->domain = iommu_get_domain_for_dev(ctx->dev);
+	if (WARN_ON(!ctx->domain)) {
+		ret = -ENODEV;
+		pr_err("iommu not available for the pci device\n");
+		goto err;
+	}
+
+	/* assumption : PCIe to be fully IO Coherent. Validate the assumption.*/
+	if (WARN_ON(!dev_is_dma_coherent(ctx->dev))) {
+		ret = -ENODEV;
+		pr_err("io-coherency not enabled for the pci device\n");
+		goto err;
+	}
+
+	/*
+	 * configure iova manager for inbound/self_mem. Application
+	 * supplied objs shall be pinned to this area.
+	 */
+	ret = iova_mngr_init("self_mem",
+			     ctx->self_mem->dma_handle, ctx->self_mem->size,
+			     &ctx->mem_mngr_h);
+	if (ret) {
+		pr_err("Failed to initialize iova memory manager\n");
+		goto err;
+	}
+
+	/*
+	 * Skip reserved iova for any use. This area in BAR0 is reserved for
+	 * GIC SPI interrupt mechanism. As the allocation, fragmentration
+	 * of iova must be identical on both @DRV_MODE_EPF and @DRV_MODE_EPC
+	 * skip this area for use in @DRV_MODE_EPC also. We skip by reserving
+	 * the iova region and thereby marking it as unusable for others.
+	 */
+	/* remote edma on x86 */
+	ret = allocate_edma_rx_desc_iova(ctx);
+	if (ret) {
+		pr_err("Failed to skip the reserved iova region\n");
+		goto err;
+	}
+
+	*pci_client_h = ctx;
+	return ret;
+
+err:
+	pci_client_deinit((void **)&ctx);
+	return ret;
+}
+
+void
+pci_client_deinit(void **pci_client_h)
+{
+	struct pci_client_t *ctx = (struct pci_client_t *)(*pci_client_h);
+
+	if (!ctx)
+		return;
+
+	free_edma_rx_desc_iova(ctx);
+
+	if (ctx->mem_mngr_h) {
+		iova_mngr_deinit(&ctx->mem_mngr_h);
+		ctx->mem_mngr_h = NULL;
+	}
+
+	free_link_status_mem(ctx);
+	mutex_destroy(&ctx->event_tbl_lock);
+	kfree(ctx);
+
+	*pci_client_h = NULL;
+}
+
+int
+pci_client_alloc_iova(void *pci_client_h, size_t size, u64 *iova,
+		      size_t *offset, void **block_h)
+{
+	struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h;
+
+	if (WARN_ON(!ctx))
+		return -EINVAL;
+
+	return iova_mngr_block_reserve(ctx->mem_mngr_h, size,
+				       iova, offset, block_h);
+}
+
+int
+pci_client_free_iova(void *pci_client_h, void **block_h)
+{
+	struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h;
+
+	if (!ctx || !block_h)
+		return -EINVAL;
+
+	return iova_mngr_block_release(ctx->mem_mngr_h, block_h);
+}
+
+int
+pci_client_map_addr(void *pci_client_h, u64 to_iova, phys_addr_t paddr,
+		    size_t size, int prot)
+{
+	struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h;
+
+	if (WARN_ON(!ctx || !to_iova || !paddr || !size))
+		return -EINVAL;
+
+	return iommu_map(ctx->domain, to_iova, paddr, size, prot);
+}
+
+size_t
+pci_client_unmap_addr(void *pci_client_h, u64 from_iova, size_t size)
+{
+	struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h;
+
+	if (!ctx || !ctx->domain)
+		return 0;
+
+	return iommu_unmap(ctx->domain, from_iova, size);
+}
+
+int
+pci_client_get_peer_aper(void *pci_client_h, size_t offsetof, size_t size,
+			 phys_addr_t *phys_addr)
+{
+	struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h;
+
+	if (WARN_ON(!ctx || !size || !phys_addr))
+		return -EINVAL;
+
+	if (ctx->peer_mem->size < (offsetof + size))
+		return -ENOMEM;
+
+	*phys_addr = ctx->peer_mem->aper + offsetof;
+
+	return 0;
+}
+
+struct dma_buf_attachment *
+pci_client_dmabuf_attach(void *pci_client_h, struct dma_buf *dmabuff)
+{
+	struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h;
+
+	if (WARN_ON(!ctx || !dmabuff))
+		return ERR_PTR(-EINVAL);
+
+	return dma_buf_attach(dmabuff, ctx->dev);
+}
+
+void
+pci_client_dmabuf_detach(void *pci_client_h, struct dma_buf *dmabuff,
+			 struct dma_buf_attachment *attach)
+{
+	struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h;
+
+	if (!ctx || !dmabuff || !attach)
+		return;
+
+	return dma_buf_detach(dmabuff, attach);
+}
+
+/* Helper function to mmap the PCI link status memory to user-space.*/
+int
+pci_client_mmap_link_mem(void *pci_client_h, struct vm_area_struct *vma)
+{
+	int ret = 0;
+	struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h;
+
+	if (WARN_ON(!vma))
+		return -EINVAL;
+
+	if (WARN_ON(!ctx || !ctx->link_status_mem.pva))
+		return -EINVAL;
+
+	if ((vma->vm_end - vma->vm_start) != ctx->link_status_mem.size)
+		return -EINVAL;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	ret = remap_pfn_range(vma,
+			      vma->vm_start,
+			      PFN_DOWN(ctx->link_status_mem.phys_addr),
+			      ctx->link_status_mem.size,
+			      vma->vm_page_prot);
+	if (ret)
+		pr_err("remap_pfn_range returns error: (%d) for Link mem\n", ret);
+
+	return ret;
+}
+
+/* Query PCI link status. */
+enum nvscic2c_pcie_link
+pci_client_query_link_status(void *pci_client_h)
+{
+	struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h;
+
+	if (WARN_ON(!ctx))
+		return NVSCIC2C_PCIE_LINK_DOWN;
+
+	return atomic_read(&ctx->link_status);
+}
+
+/*
+ * Users/Units can register for PCI link events as received by
+ * @@DRV_MODE_EPF or @DRV_MODE_EPC module sbstraction.
+ */
+int
+pci_client_register_for_link_event(void *pci_client_h,
+				   struct callback_ops *ops, u32 *id)
+{
+	u32 i = 0;
+	int ret = 0;
+	struct event_t *event = NULL;
+	struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h;
+
+	if (WARN_ON(!ctx))
+		return -EINVAL;
+
+	if (WARN_ON(!id || !ops || !ops->callback))
+		return -EINVAL;
+
+	mutex_lock(&ctx->event_tbl_lock);
+	for (i = 0; i < MAX_LINK_EVENT_USERS; i++) {
+		event = &ctx->event_tbl[i];
+		if (!atomic_read(&event->in_use)) {
+			event->cb_ops.callback = ops->callback;
+			event->cb_ops.ctx = ops->ctx;
+			atomic_set(&event->in_use, 1);
+			*id = i;
+			break;
+		}
+	}
+	if (i == MAX_LINK_EVENT_USERS) {
+		ret = -ENOMEM;
+		pr_err("PCI link event registration full\n");
+	}
+	mutex_unlock(&ctx->event_tbl_lock);
+
+	return ret;
+}
+
+/* Unregister for PCI link events. - teardown only. */
+int
+pci_client_unregister_for_link_event(void *pci_client_h, u32 id)
+{
+	int ret = 0;
+	struct event_t *event = NULL;
+	struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h;
+
+	if (WARN_ON(!ctx))
+		return -EINVAL;
+
+	if (WARN_ON(id >= MAX_LINK_EVENT_USERS))
+		return -EINVAL;
+
+	mutex_lock(&ctx->event_tbl_lock);
+	event = &ctx->event_tbl[id];
+	if (atomic_read(&event->in_use)) {
+		atomic_set(&event->in_use, 0);
+		event->cb_ops.callback = NULL;
+		event->cb_ops.ctx = NULL;
+	}
+	mutex_unlock(&ctx->event_tbl_lock);
+
+	return ret;
+}
+
+/*
+ * Update the PCI link status as received in @DRV_MODE_EPF or @DRV_MODE_EPC
+ * module abstraction. Propagate the link status to registered users.
+ */
+int
+pci_client_change_link_status(void *pci_client_h,
+			      enum nvscic2c_pcie_link status)
+{
+	u32 i = 0;
+	int ret = 0;
+	struct page *page = NULL;
+	struct event_t *event = NULL;
+	struct callback_ops *ops = NULL;
+	struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h;
+
+	if (WARN_ON(!ctx))
+		return -EINVAL;
+
+	if (WARN_ON(status < NVSCIC2C_PCIE_LINK_DOWN ||
+		    status > NVSCIC2C_PCIE_LINK_UP))
+		return -EINVAL;
+
+	/*
+	 * Reflect the status for user-space to see.
+	 * For consistent view of same phys_addr by user-space, flush the update
+	 * Call is arm64 specific.
+	 */
+	atomic_set(&ctx->link_status, status);
+	*((enum nvscic2c_pcie_link *)ctx->link_status_mem.pva) = status;
+	page = virt_to_page(ctx->link_status_mem.pva);
+	flush_dcache_page(page);
+
+	/* interrupt registered users. */
+	mutex_lock(&ctx->event_tbl_lock);
+	for (i = 0; i < MAX_LINK_EVENT_USERS; i++) {
+		event = &ctx->event_tbl[i];
+		if (atomic_read(&event->in_use)) {
+			ops = &event->cb_ops;
+			ops->callback(NULL, ops->ctx);
+		}
+	}
+	mutex_unlock(&ctx->event_tbl_lock);
+
+	return ret;
+}
+
+/*
+ * Helper functions to set and get driver context from  pci_client t
+ *
+ */
+/*Set driver context of DRV_MODE_EPF or DRV_MODE_EPC */
+int
+pci_client_save_driver_ctx(void *pci_client_h, struct driver_ctx_t *drv_ctx)
+{
+	int ret = 0;
+	struct pci_client_t *pci_client_ctx = (struct pci_client_t *)pci_client_h;
+
+	if (WARN_ON(!pci_client_ctx))
+		return -EINVAL;
+	if (WARN_ON(!drv_ctx))
+		return -EINVAL;
+	pci_client_ctx->drv_ctx = drv_ctx;
+	return ret;
+}
+
+/*Get the driver context of DRV_MODE_EPF or DRV_MODE_EPC */
+struct driver_ctx_t *
+pci_client_get_driver_ctx(void *pci_client_h)
+{
+	struct pci_client_t *pci_client_ctx = (struct pci_client_t *)pci_client_h;
+	struct driver_ctx_t *drv_ctx = NULL;
+
+	if (WARN_ON(!pci_client_ctx))
+		return NULL;
+	drv_ctx = pci_client_ctx->drv_ctx;
+	if (WARN_ON(!drv_ctx))
+		return NULL;
+	return drv_ctx;
+}
+
+/* get drv mode  */
+enum drv_mode_t
+pci_client_get_drv_mode(void *pci_client_h)
+{
+	struct pci_client_t *pci_client_ctx = (struct pci_client_t *)pci_client_h;
+	struct driver_ctx_t *drv_ctx = NULL;
+
+	if (WARN_ON(!pci_client_ctx))
+		return DRV_MODE_INVALID;
+	drv_ctx = pci_client_ctx->drv_ctx;
+	if (WARN_ON(!drv_ctx))
+		return NVCPU_MAXIMUM;
+	return drv_ctx->drv_mode;
+}
+
+/* save the peer cup orin/x86_64 */
+int
+pci_client_save_peer_cpu(void *pci_client_h, enum peer_cpu_t peer_cpu)
+{
+	int ret = 0;
+	struct pci_client_t *pci_client_ctx = (struct pci_client_t *)pci_client_h;
+	struct driver_ctx_t *drv_ctx = NULL;
+
+	if (WARN_ON(!pci_client_ctx))
+		return -EINVAL;
+	drv_ctx = pci_client_ctx->drv_ctx;
+	if (WARN_ON(!drv_ctx))
+		return -EINVAL;
+	drv_ctx->peer_cpu = peer_cpu;
+	return ret;
+}
+
+/* get the peer cup orin/x86_64 */
+enum peer_cpu_t
+pci_client_get_peer_cpu(void *pci_client_h)
+{
+	struct pci_client_t *pci_client_ctx = (struct pci_client_t *)pci_client_h;
+	struct driver_ctx_t *drv_ctx = NULL;
+
+	if (WARN_ON(!pci_client_ctx))
+		return NVCPU_MAXIMUM;
+	drv_ctx = pci_client_ctx->drv_ctx;
+	if (WARN_ON(!drv_ctx))
+		return NVCPU_MAXIMUM;
+	return drv_ctx->peer_cpu;
+}
+
+/* get  the iova allocated for x86 peer tegra-pcie-emda rx descriptor   */
+dma_addr_t
+pci_client_get_edma_rx_desc_iova(void *pci_client_h)
+{
+	struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h;
+
+	if (ctx)
+		return ctx->edma_ch_desc_iova;
+	else
+		return 0;
+}
+
+int
+pci_client_raise_irq(void *pci_client_h, enum pci_epc_irq_type type, u16 num)
+{
+	int ret = 0;
+	struct pci_client_t *pci_client_ctx = (struct pci_client_t *)pci_client_h;
+	struct driver_ctx_t *drv_ctx = NULL;
+	struct epf_context_t *epf_ctx = NULL;
+
+	if (WARN_ON(!pci_client_ctx))
+		return -EINVAL;
+
+	drv_ctx = pci_client_ctx->drv_ctx;
+	if (WARN_ON(!drv_ctx))
+		return -EINVAL;
+
+	epf_ctx = drv_ctx->epf_ctx;
+	if (WARN_ON(!epf_ctx))
+		return -EINVAL;
+	if (WARN_ON(!epf_ctx->epf))
+		return -EINVAL;
+	if (WARN_ON(drv_ctx->drv_mode != DRV_MODE_EPF))
+		return -EINVAL;
+
+	ret = pci_epc_raise_irq(epf_ctx->epf->epc, epf_ctx->epf->func_no,
+				PCIE_VFNO, type, num);
+	return ret;
+}
+
diff --git a/drivers/misc/nvscic2c-pcie/pci-client.h b/drivers/misc/nvscic2c-pcie/pci-client.h
new file mode 100644
index 00000000..95a75136
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/pci-client.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __PCI_CLIENT_H__
+#define __PCI_CLIENT_H__
+
+#include <uapi/misc/nvscic2c-pcie-ioctl.h>
+
+#include "common.h"
+#include "module.h"
+
+/* forward declaration.*/
+struct vm_area_struct;
+struct dma_buf;
+struct dma_buf_attachment;
+
+/*
+ * PCI client initialization parameters. The fields must remain persistent
+ * till deinitialization (exit).
+ */
+struct pci_client_params {
+	struct dma_buff_t *self_mem;
+	struct pci_aper_t *peer_mem;
+	/*
+	 * @DRV_MODE_EPC: &pci_dev->dev
+	 * @DRV_MODE_EPF: epf->epc->dev.parent.
+	 */
+	struct device *dev;
+};
+
+/* Initialize PCI client either for @DRV_MODE_EPF or @DRV_MODE_EPC. */
+int
+pci_client_init(struct pci_client_params *params, void **pci_client_h);
+
+/* Teardown of PCI client. */
+void
+pci_client_deinit(void **pci_client_h);
+
+/* reserve iova using the iova-manager.*/
+int
+pci_client_alloc_iova(void *pci_client_h, size_t size, u64 *address,
+		      size_t *offset, void **block_h);
+
+/* free the reserved iova.*/
+int
+pci_client_free_iova(void *pci_client_h, void **block_h);
+
+int
+pci_client_map_addr(void *pci_client_h, u64 to_iova, phys_addr_t paddr,
+		    size_t size, int prot);
+
+size_t
+pci_client_unmap_addr(void *pci_client_h, u64 from_iova, size_t size);
+
+/* get the pci aperture for a given offset.*/
+int
+pci_client_get_peer_aper(void *pci_client_h, size_t offsetof, size_t size,
+			 phys_addr_t *phys_addr);
+
+/* attach dma-buf to pci device.*/
+struct dma_buf_attachment *
+pci_client_dmabuf_attach(void *pci_client_h, struct dma_buf *dmabuff);
+
+/* detach dma-buf to pci device.*/
+void
+pci_client_dmabuf_detach(void *pci_client_h, struct dma_buf *dmabuff,
+			 struct dma_buf_attachment *attach);
+/*
+ * Users/Units can register for PCI link events as received by
+ * @DRV_MODE_EPF or @DRV_MODE_EPC module sbstraction.
+ */
+int
+pci_client_register_for_link_event(void *pci_client_h,
+				   struct callback_ops *ops, u32 *id);
+
+/* Unregister for PCI link events. - teardown only. */
+int
+pci_client_unregister_for_link_event(void *pci_client_h, u32 id);
+
+/*
+ * Update the PCI link status as received in @DRV_MODE_EPF or @DRV_MODE_EPC
+ * module abstraction. Propagate the link status to registered users.
+ */
+int
+pci_client_change_link_status(void *pci_client_h,
+			      enum nvscic2c_pcie_link status);
+
+/* Helper function to mmap the PCI link status memory to user-space.*/
+int
+pci_client_mmap_link_mem(void *pci_client_h, struct vm_area_struct *vma);
+
+/* Query PCI link status. */
+enum nvscic2c_pcie_link
+pci_client_query_link_status(void *pci_client_h);
+
+/*
+ * Helper functions to set and set driver context from pci_client_h
+ */
+
+/* Save driver context of DRV_MODE_EPF or DRV_MODE_EPC */
+int
+pci_client_save_driver_ctx(void *pci_client_h, struct driver_ctx_t *drv_ctx);
+
+/* Getter driver context of DRV_MODE_EPF or DRV_MODE_EPC */
+struct driver_ctx_t *
+pci_client_get_driver_ctx(void *pci_client_h);
+/*getter drv mode   */
+enum drv_mode_t
+pci_client_get_drv_mode(void *pci_client_h);
+/* save peer cpu sent by boottrap msg  */
+int
+pci_client_save_peer_cpu(void *pci_client_h, enum peer_cpu_t peer_cpu);
+
+/* Getter the soc/arch of rp */
+enum peer_cpu_t
+pci_client_get_peer_cpu(void *pci_client_h);
+
+/* Get allocated edma rx desc iova  */
+dma_addr_t
+pci_client_get_edma_rx_desc_iova(void *pci_client_h);
+
+/* pci client raise irq to rp */
+int
+pci_client_raise_irq(void *pci_client_h, enum pci_epc_irq_type type, u16 num);
+#endif // __PCI_CLIENT_H__
diff --git a/drivers/misc/nvscic2c-pcie/stream-extensions.c b/drivers/misc/nvscic2c-pcie/stream-extensions.c
new file mode 100644
index 00000000..5ca23721
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/stream-extensions.c
@@ -0,0 +1,1511 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#define pr_fmt(fmt)	"nvscic2c-pcie: stream-ext: " fmt
+
+#include <linux/anon_inodes.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/host1x-next.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/tegra-pcie-edma.h>
+
+#include <uapi/misc/nvscic2c-pcie-ioctl.h>
+
+#include "comm-channel.h"
+#include "common.h"
+#include "descriptor.h"
+#include "module.h"
+#include "pci-client.h"
+#include "stream-extensions.h"
+#include "vmap.h"
+
+/* forward declaration.*/
+struct stream_ext_ctx_t;
+struct stream_ext_obj;
+
+/* limits as set for copy requests.*/
+struct copy_req_limits {
+	u64 max_copy_requests;
+	u64 max_flush_ranges;
+	u64 max_post_fences;
+};
+
+/*
+ * Copied submit-copy args from use-space. These are then parsed and validated.
+ * This copy is required as args have pointer to user-space area which be copied
+ * into kernel-space before using it. On subsequent copy, basic checks are done
+ * and shall be used to create a copy request payload for eDMA.
+ */
+struct copy_req_params {
+	u64 num_local_post_fences;
+	s32 *local_post_fences;
+	u64 num_remote_post_fences;
+	s32 *remote_post_fences;
+	u64 num_flush_ranges;
+	u64 *remote_post_fence_values;
+	struct nvscic2c_pcie_flush_range *flush_ranges;
+};
+
+/* one copy request.*/
+struct copy_request {
+	/* book-keeping for copy completion.*/
+	struct list_head node;
+
+	/*
+	 * back-reference to stream_ext_context, used in eDMA callback.
+	 * to add this copy_request back in free_list for reuse. Also,
+	 * the host1x_pdev in ctx is used via this ctx in the callback.
+	 */
+	struct stream_ext_ctx_t *ctx;
+
+	/*
+	 * actual number of handles per the submit-copy request.
+	 * Shall include ((2 * num_flush_range) + num_local_post_fences
+	 * + num_remote_post_fences).
+	 * used for refcounting: out of order free and copy.
+	 */
+	u64 num_handles;
+	/*
+	 * space for num_handles considering worst-case allocation:
+	 * ((2 * max_flush_ranges) + (max_post_fences)).
+	 */
+	struct stream_ext_obj **handles;
+
+	/*
+	 * actual number of edma-desc per the submit-copy request.
+	 * Shall include (num_flush_range + num_remote_post_fences (eDMAed))
+	 */
+	u64 num_edma_desc;
+	/*
+	 * space for num_edma_desc considering worst-case allocation:
+	 * (max_flush_ranges + max_post_fences), assuming submit-copy could have
+	 * all the post-fences for remote signalling by eDMA.
+	 */
+	struct tegra_pcie_edma_desc *edma_desc;
+
+	/*
+	 * actual number of local_post-fences per the submit-copy request.
+	 * Shall include (num_local_post_fences).
+	 */
+	u64 num_local_post_fences;
+	u64 num_remote_post_fences;
+	u64 num_remote_buf_objs;
+	/*
+	 * space for num_local_post_fences considering worst-case allocation:
+	 * max_post_fences, assuming submit-copy could have all post-fences for
+	 * local signalling.
+	 */
+	struct stream_ext_obj **local_post_fences;
+	/*
+	 * space for num_remote_post_fences considering worst-case allocation:
+	 * max_post_fences, assuming submit-copy could have all post-fences for
+	 * remote signalling.
+	 */
+	struct stream_ext_obj **remote_post_fences;
+	/*
+	 * space for num_remote_buf_objs considering worst-case allocation:
+	 * max_flush_ranges, assuming submit-copy could have all flush ranges for
+	 * transfers.
+	 */
+	struct stream_ext_obj **remote_buf_objs;
+
+	/* X86 uses semaphores for fences and it needs to be written with NvSciStream
+	 * provided value
+	 */
+	u64 *remote_post_fence_values;
+	enum peer_cpu_t peer_cpu;
+};
+
+struct stream_ext_obj {
+	/* book-keeping for cleanup.*/
+	struct list_head node;
+	s32 handle;
+
+	/* back-reference to vmap handle, required during free/unmap.*/
+	void *vmap_h;
+
+	/* for correctness check. */
+	enum nvscic2c_pcie_obj_type type;
+	u32 soc_id;
+	u32 cntrlr_id;
+	u32 ep_id;
+
+	/* for ordering out of order copy and free ops.*/
+	bool marked_for_del;
+	struct kref refcount;
+
+	/* virtual mapping information.*/
+	struct vmap_obj_attributes vmap;
+
+	/*
+	 * ImportObj only.
+	 * Add offsetof from peer window to local aper base for access
+	 * by local eDMA or CPU(mmap) towards peer obj.
+	 *  - For PCIe RP.
+	 * Add offsetof from peer window to local aper base for access by
+	 * CPU(mmap) towards peer obj, eDMA will use the iova direactly.
+	 *  - For PCIe EP.
+	 */
+	u32 import_type;
+	phys_addr_t aper;
+
+	/* Mapping for ImportObj for CPU Read/Write.*/
+	void __iomem *import_obj_map;
+};
+
+/* stream extensions context per NvSciC2cPcie endpoint.*/
+struct stream_ext_ctx_t {
+	/*
+	 * mode - EPC(on PCIe RP) or EPF(on PCIe EP).
+	 * Destination address of eDMA descriptor is different for these
+	 * two modes.
+	 */
+	enum drv_mode_t drv_mode;
+
+	u32 ep_id;
+	char ep_name[NAME_MAX];
+
+	struct node_info_t local_node;
+	struct node_info_t peer_node;
+
+	/* for local post fence increment ops.*/
+	struct platform_device *host1x_pdev;
+	struct host1x *host1x;
+
+	/* vmap abstraction.*/
+	void *vmap_h;
+
+	/* tegra-pcie-edma cookie.*/
+	void *edma_h;
+
+	/* comm-channel abstraction. */
+	void *comm_channel_h;
+
+	/* pci client abstraction. */
+	void *pci_client_h;
+
+	/* max copy request limits as set by user.*/
+	struct copy_req_limits cr_limits;
+
+	/* Intermediate validated and copied user-args for submit-copy ioctl.*/
+	struct copy_req_params cr_params;
+
+	/* Async copy: book-keeping copy-requests: free and in-progress.*/
+	struct list_head free_list;
+	/* guard free_list.*/
+	struct mutex free_lock;
+	atomic_t transfer_count;
+	wait_queue_head_t transfer_waitq;
+
+	/* allocated stream obj list for book-keeping.*/
+	struct list_head obj_list;
+};
+
+static int
+cache_copy_request_handles(struct copy_req_params *params,
+			   struct copy_request *cr);
+static int
+release_copy_request_handles(struct copy_request *cr);
+
+static void
+signal_local_post_fences(struct copy_request *cr);
+
+static void
+signal_remote_post_fences(struct copy_request *cr);
+
+static int
+prepare_edma_desc(enum drv_mode_t drv_mode, struct copy_req_params *params,
+		  struct tegra_pcie_edma_desc *desc, u64 *num_desc, enum peer_cpu_t);
+
+static edma_xfer_status_t
+schedule_edma_xfer(void *edma_h, void *priv, u64 num_desc,
+		   struct tegra_pcie_edma_desc *desc);
+static void
+callback_edma_xfer(void *priv, edma_xfer_status_t status,
+		   struct tegra_pcie_edma_desc *desc);
+static int
+validate_handle(struct stream_ext_ctx_t *ctx, s32 handle,
+		enum nvscic2c_pcie_obj_type type);
+static int
+allocate_handle(struct stream_ext_ctx_t *ctx,
+		enum nvscic2c_pcie_obj_type type,
+		void *ioctl_args);
+static int
+copy_args_from_user(struct stream_ext_ctx_t *ctx,
+		    struct nvscic2c_pcie_submit_copy_args *args,
+		    struct copy_req_params *params);
+static int
+allocate_copy_request(struct stream_ext_ctx_t *ctx,
+		      struct copy_request **copy_request);
+static void
+free_copy_request(struct copy_request **copy_request);
+
+static int
+allocate_copy_req_params(struct stream_ext_ctx_t *ctx,
+			 struct copy_req_params *params);
+static void
+free_copy_req_params(struct copy_req_params *params);
+
+static int
+validate_copy_req_params(struct stream_ext_ctx_t *ctx,
+			 struct copy_req_params *params);
+
+static int
+fops_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+	int ret = 0;
+	u64 memaddr = 0x0;
+	u64 memsize = 0x0;
+	struct stream_ext_obj *stream_obj = NULL;
+
+	if (WARN_ON(!filep))
+		return -EFAULT;
+
+	if (WARN_ON(!filep->private_data))
+		return -EFAULT;
+
+	if (WARN_ON(!(vma)))
+		return -EFAULT;
+
+	/* read access of import sync object would mean poll over pcie.*/
+	if (WARN_ON(vma->vm_flags & VM_READ))
+		return -EINVAL;
+
+	stream_obj = (struct stream_ext_obj *)(filep->private_data);
+	if (WARN_ON(stream_obj->type != NVSCIC2C_PCIE_OBJ_TYPE_IMPORT))
+		return -EOPNOTSUPP;
+	if (WARN_ON(stream_obj->import_type != STREAM_OBJ_TYPE_SYNC))
+		return -EOPNOTSUPP;
+	if (WARN_ON(stream_obj->marked_for_del))
+		return -EINVAL;
+
+	memsize = stream_obj->vmap.size;
+	memaddr = stream_obj->aper;
+
+	vma->vm_pgoff  = 0;
+	vma->vm_flags |= (VM_DONTCOPY);
+	vma->vm_page_prot = pgprot_device(vma->vm_page_prot);
+	ret = remap_pfn_range(vma, vma->vm_start, PFN_DOWN(memaddr),
+			      memsize, vma->vm_page_prot);
+	if (ret)
+		pr_err("mmap() failed for Imported sync object\n");
+
+	return ret;
+}
+
+static void
+streamobj_free(struct kref *kref)
+{
+	struct stream_ext_obj *stream_obj = NULL;
+
+	if (!kref)
+		return;
+
+	stream_obj = container_of(kref, struct stream_ext_obj, refcount);
+	if (stream_obj) {
+		if (stream_obj->import_obj_map)
+			iounmap(stream_obj->import_obj_map);
+		vmap_obj_unmap(stream_obj->vmap_h, stream_obj->vmap.type,
+			       stream_obj->vmap.id);
+		kfree(stream_obj);
+	}
+}
+
+static int
+fops_release(struct inode *inode, struct file *filep)
+{
+	struct stream_ext_obj *stream_obj =
+				(struct stream_ext_obj *)(filep->private_data);
+	if (!stream_obj)
+		return 0;
+	/*
+	 * actual free happens when the refcount reaches zero. This is done to
+	 * accommodate: out of order free while copy isin progress use-case.
+	 */
+	list_del(&stream_obj->node);
+	stream_obj->marked_for_del = true;
+	kref_put(&stream_obj->refcount, streamobj_free);
+
+	filep->private_data = NULL;
+	return 0;
+}
+
+/* for all stream objs - Local, remote + Mem, Sync, Import*/
+static const struct file_operations fops_default = {
+	.owner = THIS_MODULE,
+	.release = fops_release,
+	.mmap = fops_mmap,
+};
+
+/* implement NVSCIC2C_PCIE_IOCTL_FREE ioctl call. */
+static int
+ioctl_free_obj(struct stream_ext_ctx_t *ctx,
+	       struct nvscic2c_pcie_free_obj_args *args)
+{
+	int ret = 0;
+	struct file *filep = NULL;
+	struct stream_ext_obj *stream_obj = NULL;
+
+	/* validate the input handle for correctness.*/
+	ret = validate_handle(ctx, args->handle, args->obj_type);
+	if (ret)
+		return ret;
+
+	filep = fget(args->handle);
+	stream_obj = filep->private_data;
+	filep->private_data = NULL;
+	fput(filep);
+
+	if (stream_obj) {
+		list_del(&stream_obj->node);
+		stream_obj->marked_for_del = true;
+		kref_put(&stream_obj->refcount, streamobj_free);
+	}
+
+	/* this shall close the handle: resulting in fops_release().*/
+	close_fd(args->handle);
+
+	return 0;
+}
+
+/* implement NVSCIC2C_PCIE_IOCTL_GET_AUTH_TOKEN call. */
+static int
+ioctl_export_obj(struct stream_ext_ctx_t *ctx,
+		 struct nvscic2c_pcie_export_obj_args *args)
+{
+	int ret = 0;
+	u64 exp_desc = 0;
+	struct comm_msg msg = {0};
+	struct file *filep = NULL;
+	struct stream_ext_obj *stream_obj = NULL;
+	struct node_info_t *peer = &ctx->peer_node;
+	enum vmap_obj_type export_type = STREAM_OBJ_TYPE_MEM;
+
+	/* validate the input handle for correctness.*/
+	ret = validate_handle(ctx, args->in.handle, args->obj_type);
+	if (ret)
+		return ret;
+
+	/* only target/remote can be exported.*/
+	if (args->obj_type == NVSCIC2C_PCIE_OBJ_TYPE_TARGET_MEM)
+		export_type = STREAM_OBJ_TYPE_MEM;
+	else if (args->obj_type == NVSCIC2C_PCIE_OBJ_TYPE_REMOTE_SYNC)
+		export_type = STREAM_OBJ_TYPE_SYNC;
+	else
+		return -EINVAL;
+
+	filep = fget(args->in.handle);
+	if (!filep) {
+		pr_err("filep is NULL\n");
+		return -EINVAL;
+	}
+	stream_obj = filep->private_data;
+
+	/*
+	 * take a reference to the virtual mapping. The reference shall be
+	 * released by peer when the peer unregisters it's corresponding
+	 * imported obj. This happens via comm-channel.
+	 *
+	 * reference count of stream_obj is not taken, valid scenario to
+	 * free the exported obj from this SoC but the virtual mapping
+	 * to continue exist and is released when peer SoC releases it's
+	 * corresponding import stream obj.
+	 */
+	ret = vmap_obj_getref(stream_obj->vmap_h, stream_obj->vmap.type,
+			      stream_obj->vmap.id);
+	if (ret) {
+		pr_err("(%s): Failed ref counting an object\n", ctx->ep_name);
+		fput(filep);
+		return ret;
+	}
+
+	/* generate export desc.*/
+	peer = &ctx->peer_node;
+	exp_desc = gen_desc(peer->board_id, peer->soc_id, peer->cntrlr_id,
+			    ctx->ep_id, export_type, stream_obj->vmap.id);
+
+	/*share it with peer for peer for corresponding import.*/
+	pr_debug("Exporting descriptor = (%llu)\n", exp_desc);
+	msg.type = COMM_MSG_TYPE_REGISTER;
+	msg.u.reg.export_desc = exp_desc;
+	msg.u.reg.iova = stream_obj->vmap.iova;
+	msg.u.reg.size = stream_obj->vmap.size;
+	msg.u.reg.offsetof = stream_obj->vmap.offsetof;
+	ret = comm_channel_msg_send(ctx->comm_channel_h, &msg);
+	if (ret)
+		vmap_obj_putref(stream_obj->vmap_h, stream_obj->vmap.type,
+				stream_obj->vmap.id);
+	else
+		args->out.desc = exp_desc;
+
+	fput(filep);
+	return ret;
+}
+
+/* implement NVSCIC2C_PCIE_IOCTL_GET_HANDLE call. */
+static int
+ioctl_import_obj(struct stream_ext_ctx_t *ctx,
+		 struct nvscic2c_pcie_import_obj_args *args)
+{
+	int ret = 0;
+	s32 handle = -1;
+	struct file *filep = NULL;
+	struct stream_ext_obj *stream_obj = NULL;
+	struct node_info_t *local = &ctx->local_node;
+	enum peer_cpu_t peer_cpu;
+
+	if (args->obj_type != NVSCIC2C_PCIE_OBJ_TYPE_IMPORT)
+		return -EINVAL;
+
+	/* validate the incoming descriptor.*/
+	ret = validate_desc(args->in.desc, local->board_id, local->soc_id,
+			    local->cntrlr_id, ctx->ep_id);
+	if (ret) {
+		pr_err("(%s): Invalid descriptor: (%llu) received\n",
+		       ctx->ep_name, args->in.desc);
+		return ret;
+	}
+
+	/*
+	 * Import the desc :- create virt. mapping, bind it to a stream_obj
+	 * and create a UMD handle for this stream_obj.
+	 */
+	handle = allocate_handle(ctx, args->obj_type, (void *)args);
+	if (handle < 0)
+		return handle;
+	pr_debug("Imported descriptor = (%llu)\n", args->in.desc);
+
+	filep = fget(handle);
+	if (!filep)
+		return -ENOMEM;
+
+	stream_obj = filep->private_data;
+	stream_obj->import_type = get_handle_type_from_desc(args->in.desc);
+	ret = pci_client_get_peer_aper(ctx->pci_client_h, stream_obj->vmap.offsetof,
+				       stream_obj->vmap.size, &stream_obj->aper);
+	if (ret) {
+		pr_err("(%s): PCI Client Get Peer Aper Failed\n", ctx->ep_name);
+		fput(filep);
+		return ret;
+	}
+
+	peer_cpu = pci_client_get_peer_cpu(ctx->pci_client_h);
+	if (peer_cpu == NVCPU_X86_64)
+		stream_obj->import_obj_map = ioremap(stream_obj->aper, PAGE_SIZE);
+	fput(filep);
+
+	args->out.handle = handle;
+
+	return ret;
+}
+
+/* implement NVSCIC2C_PCIE_IOCTL_MAP ioctl call. */
+static int
+ioctl_map_obj(struct stream_ext_ctx_t *ctx,
+	      struct nvscic2c_pcie_map_obj_args *args)
+{
+	int ret = 0;
+	s32 handle = -1;
+
+	/*
+	 * Create virt. mapping for the user primitive objs - Mem or Sync.
+	 * Bind it to a stream_obj. Create a UMD handle for this stream_obj.
+	 */
+	handle = allocate_handle(ctx, args->obj_type, (void *)args);
+	if (handle < 0)
+		return handle;
+
+	args->out.handle = handle;
+
+	return ret;
+}
+
+/* implement NVSCIC2C_PCIE_IOCTL_SUBMIT_COPY_REQUEST ioctl call. */
+static int
+ioctl_submit_copy_request(struct stream_ext_ctx_t *ctx,
+			  struct nvscic2c_pcie_submit_copy_args *args)
+{
+	int ret = 0;
+	struct copy_request *cr = NULL;
+	edma_xfer_status_t edma_status = EDMA_XFER_FAIL_INVAL_INPUTS;
+	enum nvscic2c_pcie_link link = NVSCIC2C_PCIE_LINK_DOWN;
+
+	link = pci_client_query_link_status(ctx->pci_client_h);
+	if (link != NVSCIC2C_PCIE_LINK_UP)
+		return -ENOLINK;
+
+	/* copy user-supplied submit-copy args.*/
+	ret = copy_args_from_user(ctx, args, &ctx->cr_params);
+	if (ret)
+		return ret;
+
+	/* validate the user-supplied handles in flush_range and post-fence.*/
+	ret = validate_copy_req_params(ctx, &ctx->cr_params);
+	if (ret)
+		return ret;
+
+	/* get one copy-request from the free list.*/
+	mutex_lock(&ctx->free_lock);
+	if (list_empty(&ctx->free_list)) {
+		/*
+		 * user supplied more than mentioned in max_copy_requests OR
+		 * eDMA async didn't invoke callback when eDMA was done.
+		 */
+		mutex_unlock(&ctx->free_lock);
+		return -EAGAIN;
+	}
+	cr = list_first_entry(&ctx->free_list, struct copy_request, node);
+	list_del(&cr->node);
+	mutex_unlock(&ctx->free_lock);
+
+	/*
+	 * To support out-of-order free and copy-requets when eDMA is in async
+	 * mode, cache all the handles from the copy-submit params and increment
+	 * their reference count before eDMA ops. Post eDMA, decrement the
+	 * reference, thereby, when during in-progress eDMA, free() is received
+	 * for the same set of handles, the handles would be marked for deletion
+	 * but doesn't actually get deleted.
+	 */
+	ret = cache_copy_request_handles(&ctx->cr_params, cr);
+	if (ret)
+		goto reclaim_cr;
+
+	cr->peer_cpu = pci_client_get_peer_cpu(ctx->pci_client_h);
+	/* generate eDMA descriptors from flush_ranges, remote_post_fences.*/
+	ret = prepare_edma_desc(ctx->drv_mode, &ctx->cr_params, cr->edma_desc,
+				&cr->num_edma_desc, cr->peer_cpu);
+	if (ret) {
+		release_copy_request_handles(cr);
+		goto reclaim_cr;
+	}
+
+	/* schedule asynchronous eDMA.*/
+	atomic_inc(&ctx->transfer_count);
+	edma_status = schedule_edma_xfer(ctx->edma_h, (void *)cr,
+					 cr->num_edma_desc, cr->edma_desc);
+	if (edma_status != EDMA_XFER_SUCCESS) {
+		ret = -EIO;
+		atomic_dec(&ctx->transfer_count);
+		release_copy_request_handles(cr);
+		goto reclaim_cr;
+	}
+
+	return ret;
+
+reclaim_cr:
+	mutex_lock(&ctx->free_lock);
+	list_add_tail(&cr->node, &ctx->free_list);
+	mutex_unlock(&ctx->free_lock);
+	return ret;
+}
+
+/* implement NVSCIC2C_PCIE_IOCTL_MAX_COPY_REQUESTS ioctl call. */
+static int
+ioctl_set_max_copy_requests(struct stream_ext_ctx_t *ctx,
+			    struct nvscic2c_pcie_max_copy_args *args)
+{
+	int ret = 0;
+	u32 i = 0;
+	struct copy_request *cr = NULL;
+	struct list_head *curr = NULL, *next = NULL;
+
+	if (WARN_ON(!args->max_copy_requests ||
+		    !args->max_flush_ranges ||
+		    !args->max_post_fences))
+		return -EINVAL;
+
+	/* limits already set.*/
+	if (WARN_ON(ctx->cr_limits.max_copy_requests ||
+		    ctx->cr_limits.max_flush_ranges ||
+		    ctx->cr_limits.max_post_fences))
+		return -EINVAL;
+
+	ctx->cr_limits.max_copy_requests = args->max_copy_requests;
+	ctx->cr_limits.max_flush_ranges = args->max_flush_ranges;
+	ctx->cr_limits.max_post_fences = args->max_post_fences;
+
+	/* allocate one submit-copy params.*/
+	ret = allocate_copy_req_params(ctx, &ctx->cr_params);
+	if (ret) {
+		pr_err("Failed to allocate submit-copy params\n");
+		goto clean_up;
+	}
+
+	/* allocate the maximum outstanding copy requests we can have.*/
+	for (i = 0; i < ctx->cr_limits.max_copy_requests; i++) {
+		cr = NULL;
+		ret = allocate_copy_request(ctx, &cr);
+		if (ret) {
+			pr_err("Failed to allocate copy request\n");
+			goto clean_up;
+		}
+
+		mutex_lock(&ctx->free_lock);
+		list_add(&cr->node, &ctx->free_list);
+		mutex_unlock(&ctx->free_lock);
+	}
+
+	return ret;
+
+clean_up:
+	mutex_unlock(&ctx->free_lock);
+	list_for_each_safe(curr, next, &ctx->free_list) {
+		cr = list_entry(curr, struct copy_request, node);
+		list_del(curr);
+		free_copy_request(&cr);
+	}
+	mutex_unlock(&ctx->free_lock);
+
+	free_copy_req_params(&ctx->cr_params);
+
+	return ret;
+}
+
+int
+stream_extension_ioctl(void *stream_ext_h, unsigned int cmd, void *args)
+{
+	int ret = 0;
+	struct stream_ext_ctx_t *ctx = NULL;
+
+	if (WARN_ON(!stream_ext_h || !args))
+		return -EINVAL;
+
+	ctx = (struct stream_ext_ctx_t *)stream_ext_h;
+	switch (cmd) {
+	case NVSCIC2C_PCIE_IOCTL_MAP:
+		ret = ioctl_map_obj
+			((struct stream_ext_ctx_t *)ctx,
+			 (struct nvscic2c_pcie_map_obj_args *)args);
+		break;
+	case NVSCIC2C_PCIE_IOCTL_GET_AUTH_TOKEN:
+		ret = ioctl_export_obj
+			((struct stream_ext_ctx_t *)ctx,
+			 (struct nvscic2c_pcie_export_obj_args *)args);
+		break;
+	case NVSCIC2C_PCIE_IOCTL_GET_HANDLE:
+		ret = ioctl_import_obj
+			((struct stream_ext_ctx_t *)ctx,
+			 (struct nvscic2c_pcie_import_obj_args *)args);
+		break;
+	case NVSCIC2C_PCIE_IOCTL_FREE:
+		ret = ioctl_free_obj
+			((struct stream_ext_ctx_t *)ctx,
+			 (struct nvscic2c_pcie_free_obj_args *)args);
+		break;
+	case NVSCIC2C_PCIE_IOCTL_SUBMIT_COPY_REQUEST:
+		ret = ioctl_submit_copy_request
+			((struct stream_ext_ctx_t *)ctx,
+			 (struct nvscic2c_pcie_submit_copy_args *)args);
+		break;
+	case NVSCIC2C_PCIE_IOCTL_MAX_COPY_REQUESTS:
+		ret = ioctl_set_max_copy_requests
+			((struct stream_ext_ctx_t *)ctx,
+			 (struct nvscic2c_pcie_max_copy_args *)args);
+		break;
+	default:
+		pr_err("(%s): unrecognised nvscic2c-pcie ioclt cmd: 0x%x\n",
+		       ctx->ep_name, cmd);
+		ret = -ENOTTY;
+		break;
+	}
+	return ret;
+}
+
+int
+stream_extension_init(struct stream_ext_params *params, void **stream_ext_h)
+{
+	int ret = 0;
+	struct stream_ext_ctx_t *ctx = NULL;
+
+	if (WARN_ON(!params || !stream_ext_h || *stream_ext_h))
+		return -EINVAL;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (WARN_ON(!ctx))
+		return -ENOMEM;
+
+	ctx->drv_mode = params->drv_mode;
+	ctx->ep_id = params->ep_id;
+	ctx->host1x_pdev = params->host1x_pdev;
+	ctx->edma_h = params->edma_h;
+	ctx->vmap_h = params->vmap_h;
+	ctx->pci_client_h = params->pci_client_h;
+	ctx->comm_channel_h = params->comm_channel_h;
+	strscpy(ctx->ep_name, params->ep_name, NAME_MAX);
+	memcpy(&ctx->local_node, params->local_node, sizeof(ctx->local_node));
+	memcpy(&ctx->peer_node, params->peer_node, sizeof(ctx->peer_node));
+
+	ctx->host1x = platform_get_drvdata(ctx->host1x_pdev);
+	if (!ctx->host1x) {
+		ret = -EINVAL;
+		pr_err("Could not get host1x handle from host1x_pdev.");
+		goto err;
+	}
+
+	/* copy operations.*/
+	mutex_init(&ctx->free_lock);
+	INIT_LIST_HEAD(&ctx->free_list);
+	atomic_set(&ctx->transfer_count, 0);
+	init_waitqueue_head(&ctx->transfer_waitq);
+
+	/* bookkeeping of stream objs. */
+	INIT_LIST_HEAD(&ctx->obj_list);
+
+	*stream_ext_h = (void *)ctx;
+
+	return 0;
+err:
+	kfree(ctx);
+	return ret;
+}
+
+#define MAX_TRANSFER_TIMEOUT_MSEC	(5000)
+void
+stream_extension_deinit(void **stream_ext_h)
+{
+	long ret = 0;
+	struct file *filep = NULL;
+	struct copy_request *cr = NULL;
+	struct stream_ext_obj *stream_obj = NULL;
+	struct list_head *curr = NULL, *next = NULL;
+	struct stream_ext_ctx_t *ctx = (struct stream_ext_ctx_t *)*stream_ext_h;
+
+	if (!ctx)
+		return;
+
+	/* wait for eDMA/copy(ies) to complete/abort. */
+	ret =
+	wait_event_timeout(ctx->transfer_waitq,
+			   !(atomic_read(&ctx->transfer_count)),
+			   msecs_to_jiffies(MAX_TRANSFER_TIMEOUT_MSEC));
+	if (ret <= 0)
+		pr_err("(%s): timed-out waiting for eDMA callbacks to return\n",
+		       ctx->ep_name);
+
+	mutex_lock(&ctx->free_lock);
+	list_for_each_safe(curr, next, &ctx->free_list) {
+		cr = list_entry(curr, struct copy_request, node);
+		list_del(curr);
+		free_copy_request(&cr);
+	}
+	mutex_unlock(&ctx->free_lock);
+	free_copy_req_params(&ctx->cr_params);
+	mutex_destroy(&ctx->free_lock);
+
+	/*
+	 * clean-up the non freed stream objs. Descriptor shall be freed when
+	 * application exits.
+	 */
+	list_for_each_safe(curr, next, &ctx->obj_list) {
+		stream_obj = list_entry(curr, struct stream_ext_obj, node);
+		filep = fget(stream_obj->handle);
+		if (filep) {
+			filep->private_data = NULL;
+			fput(filep);
+		}
+		list_del(curr);
+		stream_obj->marked_for_del = true;
+		kref_put(&stream_obj->refcount, streamobj_free);
+	}
+
+	kfree(ctx);
+	*stream_ext_h = NULL;
+}
+
+static int
+allocate_handle(struct stream_ext_ctx_t *ctx, enum nvscic2c_pcie_obj_type type,
+		void *ioctl_args)
+{
+	int ret = 0;
+	s32 handle = -1;
+	struct stream_ext_obj *stream_obj = NULL;
+	struct vmap_obj_map_params vmap_params = {0};
+	struct vmap_obj_attributes vmap_attrib = {0};
+
+	/* one of the two below would apply.*/
+	struct nvscic2c_pcie_map_obj_args *map_args =
+		(struct nvscic2c_pcie_map_obj_args *)ioctl_args;
+	struct nvscic2c_pcie_import_obj_args *import_args =
+		(struct nvscic2c_pcie_import_obj_args *)ioctl_args;
+
+	/* create pcie virtual mapping of the obj.*/
+	switch (type) {
+	case NVSCIC2C_PCIE_OBJ_TYPE_SOURCE_MEM:
+		vmap_params.type = VMAP_OBJ_TYPE_MEM;
+		vmap_params.u.memobj.mngd = VMAP_MNGD_DEV;
+		vmap_params.u.memobj.prot = VMAP_OBJ_PROT_READ;
+		vmap_params.u.memobj.fd = map_args->in.fd;
+		break;
+	case NVSCIC2C_PCIE_OBJ_TYPE_TARGET_MEM:
+		vmap_params.type = VMAP_OBJ_TYPE_MEM;
+		vmap_params.u.memobj.mngd = VMAP_MNGD_CLIENT;
+		vmap_params.u.memobj.prot = VMAP_OBJ_PROT_WRITE;
+		vmap_params.u.memobj.fd = map_args->in.fd;
+		break;
+	case NVSCIC2C_PCIE_OBJ_TYPE_LOCAL_SYNC:
+		vmap_params.type = VMAP_OBJ_TYPE_SYNC;
+		vmap_params.u.syncobj.pin_reqd = false;
+		vmap_params.u.syncobj.fd = map_args->in.fd;
+		vmap_params.u.syncobj.id = map_args->in.id;
+		break;
+	case NVSCIC2C_PCIE_OBJ_TYPE_REMOTE_SYNC:
+		vmap_params.type = VMAP_OBJ_TYPE_SYNC;
+		vmap_params.u.syncobj.pin_reqd = true;
+		vmap_params.u.syncobj.mngd = VMAP_MNGD_CLIENT;
+		vmap_params.u.syncobj.prot = VMAP_OBJ_PROT_WRITE;
+		vmap_params.u.syncobj.fd = map_args->in.fd;
+		vmap_params.u.syncobj.id = map_args->in.id;
+		break;
+	case NVSCIC2C_PCIE_OBJ_TYPE_IMPORT:
+		vmap_params.type = VMAP_OBJ_TYPE_IMPORT;
+		vmap_params.u.importobj.export_desc = import_args->in.desc;
+		break;
+	default:
+		pr_err("Incorrect NVSCIC2C_IOCTL_MAP params\n");
+		return -EINVAL;
+	}
+	ret = vmap_obj_map(ctx->vmap_h, &vmap_params, &vmap_attrib);
+	if (ret) {
+		if (ret == -EAGAIN)
+			pr_info("Failed to map obj of type: (%d)\n", type);
+		else
+			pr_err("Failed to map obj of type: (%d)\n", type);
+		return ret;
+	}
+
+	/* bind the pcie virt. mapping to a streaming obj.*/
+	stream_obj = kzalloc(sizeof(*stream_obj), GFP_KERNEL);
+	if (WARN_ON(!stream_obj)) {
+		vmap_obj_unmap(ctx->vmap_h, vmap_attrib.type, vmap_attrib.id);
+		return -ENOMEM;
+	}
+
+	/*
+	 * allocate a UMD handle for this streaming_obj.
+	 * O_RDWR is required only for ImportedSyncObjs mmap() from user-space.
+	 */
+	handle = anon_inode_getfd("nvscic2c-pcie-stream-ext", &fops_default,
+				  stream_obj, (O_RDWR | O_CLOEXEC));
+	if (handle < 0) {
+		pr_err("(%s): Failed to get stream obj handle\n", ctx->ep_name);
+		vmap_obj_unmap(ctx->vmap_h, vmap_attrib.type, vmap_attrib.id);
+		kfree(stream_obj);
+		return -EFAULT;
+	}
+
+	list_add_tail(&stream_obj->node, &ctx->obj_list);
+	stream_obj->handle = handle;
+	stream_obj->vmap_h = ctx->vmap_h;
+	stream_obj->type = type;
+	stream_obj->soc_id = ctx->local_node.soc_id;
+	stream_obj->cntrlr_id = ctx->local_node.cntrlr_id;
+	stream_obj->ep_id = ctx->ep_id;
+	memcpy(&stream_obj->vmap, &vmap_attrib, sizeof(vmap_attrib));
+	kref_init(&stream_obj->refcount);
+
+	return handle;
+}
+
+static edma_xfer_status_t
+schedule_edma_xfer(void *edma_h, void *priv, u64 num_desc,
+		   struct tegra_pcie_edma_desc *desc)
+{
+	struct tegra_pcie_edma_xfer_info info = {0};
+
+	if (WARN_ON(!num_desc || !desc))
+		return -EINVAL;
+
+	info.type = EDMA_XFER_WRITE;
+	info.channel_num = 0; // no use-case to use all WR channels yet.
+	info.desc = desc;
+	info.nents = num_desc;
+	info.complete = callback_edma_xfer;
+	info.priv = priv;
+
+	return tegra_pcie_edma_submit_xfer(edma_h, &info);
+}
+
+/* Callback with each async eDMA submit xfer.*/
+static void
+callback_edma_xfer(void *priv, edma_xfer_status_t status,
+		   struct tegra_pcie_edma_desc *desc)
+{
+	struct copy_request *cr = (struct copy_request *)priv;
+
+	mutex_lock(&cr->ctx->free_lock);
+	/* increment num_local_fences.*/
+	if (status == EDMA_XFER_SUCCESS) {
+		/* X86 remote end fences are signaled through CPU */
+		if (cr->peer_cpu == NVCPU_X86_64)
+			signal_remote_post_fences(cr);
+
+		/* Signal local fences for Tegra*/
+		signal_local_post_fences(cr);
+	}
+
+	/* releases the references of the cubmit-copy handles.*/
+	release_copy_request_handles(cr);
+
+	/* reclaim the copy_request for reuse.*/
+	list_add_tail(&cr->node, &cr->ctx->free_list);
+	mutex_unlock(&cr->ctx->free_lock);
+
+	if (atomic_dec_and_test(&cr->ctx->transfer_count))
+		wake_up_all(&cr->ctx->transfer_waitq);
+}
+
+static int
+prepare_edma_desc(enum drv_mode_t drv_mode, struct copy_req_params *params,
+		  struct tegra_pcie_edma_desc *desc, u64 *num_desc, enum peer_cpu_t peer_cpu)
+{
+	u32 i = 0;
+	int ret = 0;
+	u32 iter = 0;
+	s32 handle = -1;
+	struct file *filep = NULL;
+	struct stream_ext_obj *stream_obj = NULL;
+	struct nvscic2c_pcie_flush_range *flush_range = NULL;
+	phys_addr_t dummy_addr = 0x0;
+
+	*num_desc = 0;
+	for (i = 0; i < params->num_flush_ranges; i++) {
+		flush_range = &params->flush_ranges[i];
+
+		filep = fget(flush_range->src_handle);
+		stream_obj = filep->private_data;
+		desc[iter].src = (stream_obj->vmap.iova + flush_range->offset);
+		dummy_addr = stream_obj->vmap.iova;
+		fput(filep);
+
+		filep = fget(flush_range->dst_handle);
+		stream_obj = filep->private_data;
+		if (drv_mode == DRV_MODE_EPC)
+			desc[iter].dst = stream_obj->aper;
+		else
+			desc[iter].dst = stream_obj->vmap.iova;
+		desc[iter].dst += flush_range->offset;
+		fput(filep);
+
+		desc[iter].sz = flush_range->size;
+		iter++;
+	}
+	/* With Orin as remote end, the remote fence signaling is done using DMA
+	 * With X86 as remote end, the remote fence signaling is done using CPU
+	 */
+	if (peer_cpu == NVCPU_ORIN) {
+		for (i = 0; i < params->num_remote_post_fences; i++) {
+			handle = params->remote_post_fences[i];
+			desc[iter].src = dummy_addr;
+
+			filep = fget(handle);
+			stream_obj = filep->private_data;
+			if (drv_mode == DRV_MODE_EPC)
+				desc[iter].dst = stream_obj->aper;
+			else
+				desc[iter].dst = stream_obj->vmap.iova;
+
+			fput(filep);
+
+			desc[iter].sz = 4;
+			iter++;
+		}
+	}
+	*num_desc += iter;
+	return ret;
+}
+
+/* this is post eDMA path, must be done with references still taken.*/
+static void
+signal_local_post_fences(struct copy_request *cr)
+{
+	u32 i = 0;
+	struct host1x_syncpt *sp = NULL;
+	struct stream_ext_obj *stream_obj = NULL;
+
+	for (i = 0; i < cr->num_local_post_fences; i++) {
+		stream_obj = cr->local_post_fences[i];
+		/*
+		 * Use noref API to fetch host1x_syncpt as ref is already taken
+		 * while pinning and syncpoint won't be freed until we get
+		 * EDMA callback for all submitted jobs.
+		 */
+		sp = host1x_syncpt_get_by_id_noref(cr->ctx->host1x,
+						   stream_obj->vmap.syncpt_id);
+		if (sp)
+			(void)host1x_syncpt_incr(sp);
+	}
+}
+
+static void
+signal_remote_post_fences(struct copy_request *cr)
+{
+	u32 i = 0;
+	struct stream_ext_obj *stream_obj = NULL;
+	/* Dummy read operation is done on the imported buffer object to ensure
+	 * coherence of data on Vidmem of GA100 dGPU, which is connected as an EP to X86.
+	 * This is needed as Ampere architecture doesn't support coherence of Write after
+	 * Write operation and the dummy read of 4 bytes ensures the data is reconciled in
+	 * vid-memory when the consumer waiting on a sysmem semaphore is unblocked.
+	 */
+	for (i = 0; i < cr->num_remote_buf_objs; i++) {
+		stream_obj = cr->remote_buf_objs[i];
+		(void)readl(stream_obj->import_obj_map);
+	}
+	for (i = 0; i < cr->num_remote_post_fences; i++) {
+		stream_obj = cr->remote_post_fences[i];
+		writeq(cr->remote_post_fence_values[i], stream_obj->import_obj_map);
+	}
+}
+
+static int
+release_copy_request_handles(struct copy_request *cr)
+{
+	u32 i = 0;
+	struct stream_ext_obj *stream_obj = NULL;
+
+	for (i = 0; i < cr->num_handles; i++) {
+		stream_obj = cr->handles[i];
+		kref_put(&stream_obj->refcount, streamobj_free);
+	}
+
+	return 0;
+}
+
+static int
+cache_copy_request_handles(struct copy_req_params *params,
+			   struct copy_request *cr)
+{
+	u32 i = 0;
+	s32 handle = -1;
+	struct file *filep = NULL;
+	struct stream_ext_obj *stream_obj = NULL;
+
+	cr->num_handles = 0;
+	cr->num_local_post_fences = 0;
+	cr->num_remote_post_fences = 0;
+	cr->num_remote_buf_objs = 0;
+	for (i = 0; i < params->num_local_post_fences; i++) {
+		handle = params->local_post_fences[i];
+		filep = fget(handle);
+		stream_obj = filep->private_data;
+		kref_get(&stream_obj->refcount);
+		cr->handles[cr->num_handles] = stream_obj;
+		cr->num_handles++;
+		/* collect all local post fences separately for nvhost incr.*/
+		cr->local_post_fences[cr->num_local_post_fences] = stream_obj;
+		cr->num_local_post_fences++;
+		fput(filep);
+	}
+	for (i = 0; i < params->num_remote_post_fences; i++) {
+		handle = params->remote_post_fences[i];
+		filep = fget(handle);
+		stream_obj = filep->private_data;
+		kref_get(&stream_obj->refcount);
+		cr->handles[cr->num_handles] = stream_obj;
+		cr->num_handles++;
+		cr->remote_post_fence_values[i] =  params->remote_post_fence_values[i];
+		cr->remote_post_fences[cr->num_remote_post_fences] = stream_obj;
+		cr->num_remote_post_fences++;
+		fput(filep);
+	}
+	for (i = 0; i < params->num_flush_ranges; i++) {
+		handle = params->flush_ranges[i].src_handle;
+		filep = fget(handle);
+		stream_obj = filep->private_data;
+		kref_get(&stream_obj->refcount);
+		cr->handles[cr->num_handles] = stream_obj;
+		cr->num_handles++;
+		fput(filep);
+
+		handle = params->flush_ranges[i].dst_handle;
+		filep = fget(handle);
+		stream_obj = filep->private_data;
+		kref_get(&stream_obj->refcount);
+		cr->handles[cr->num_handles] = stream_obj;
+		cr->num_handles++;
+
+		cr->remote_buf_objs[cr->num_remote_buf_objs] = stream_obj;
+		cr->num_remote_buf_objs++;
+		fput(filep);
+	}
+
+	return 0;
+}
+
+static int
+validate_handle(struct stream_ext_ctx_t *ctx, s32 handle,
+		enum nvscic2c_pcie_obj_type type)
+{
+	int ret = -EINVAL;
+	struct stream_ext_obj *stream_obj = NULL;
+	struct file *filep = fget(handle);
+
+	if (!filep)
+		goto exit;
+
+	if (filep->f_op != &fops_default)
+		goto err;
+
+	stream_obj = filep->private_data;
+	if (!stream_obj)
+		goto err;
+
+	if (stream_obj->marked_for_del)
+		goto err;
+
+	if (stream_obj->soc_id != ctx->local_node.soc_id ||
+	    stream_obj->cntrlr_id != ctx->local_node.cntrlr_id ||
+	    stream_obj->ep_id != ctx->ep_id)
+		goto err;
+
+	if (stream_obj->type != type)
+		goto err;
+
+	/* okay.*/
+	ret = 0;
+err:
+	fput(filep);
+exit:
+	return ret;
+}
+
+static int
+validate_import_handle(struct stream_ext_ctx_t *ctx, s32 handle,
+		       u32 import_type)
+{
+	int ret = 0;
+	struct stream_ext_obj *stream_obj = NULL;
+	struct file *filep = NULL;
+
+	ret = validate_handle(ctx, handle, NVSCIC2C_PCIE_OBJ_TYPE_IMPORT);
+	if (ret)
+		return ret;
+
+	filep = fget(handle);
+	stream_obj = filep->private_data;
+	if (stream_obj->import_type != import_type) {
+		fput(filep);
+		return -EINVAL;
+	}
+	fput(filep);
+
+	return ret;
+}
+
+static int
+validate_flush_range(struct stream_ext_ctx_t *ctx,
+		     struct nvscic2c_pcie_flush_range *flush_range)
+{
+	int ret = 0;
+	struct file *filep = NULL;
+	struct stream_ext_obj *stream_obj = NULL;
+
+	if (flush_range->size <= 0)
+		return -EINVAL;
+
+	/* eDMA expects u32 datatype.*/
+	if (flush_range->size > U32_MAX)
+		return -EINVAL;
+
+	if (flush_range->size & 0x3)
+		return -EINVAL;
+
+	if (flush_range->offset & 0x3)
+		return -EINVAL;
+
+	ret = validate_handle(ctx, flush_range->src_handle,
+			      NVSCIC2C_PCIE_OBJ_TYPE_SOURCE_MEM);
+	if (ret)
+		return ret;
+
+	ret = validate_import_handle(ctx, flush_range->dst_handle,
+				     STREAM_OBJ_TYPE_MEM);
+	if (ret)
+		return ret;
+
+	filep = fget(flush_range->src_handle);
+	stream_obj = filep->private_data;
+	if ((flush_range->offset + flush_range->size) > stream_obj->vmap.size) {
+		fput(filep);
+		return -EINVAL;
+	}
+	fput(filep);
+
+	filep = fget(flush_range->dst_handle);
+	stream_obj = filep->private_data;
+	if ((flush_range->offset + flush_range->size) > stream_obj->vmap.size) {
+		fput(filep);
+		return -EINVAL;
+	}
+	fput(filep);
+
+	return 0;
+}
+
+static int
+validate_copy_req_params(struct stream_ext_ctx_t *ctx,
+			 struct copy_req_params *params)
+{
+	u32 i = 0;
+	int ret = 0;
+
+	/* for each local post-fence.*/
+	for (i = 0; i < params->num_local_post_fences; i++) {
+		s32 handle = 0;
+
+		handle = params->local_post_fences[i];
+		ret = validate_handle(ctx, handle,
+				      NVSCIC2C_PCIE_OBJ_TYPE_LOCAL_SYNC);
+		if (ret)
+			return ret;
+	}
+	/* for each remote post-fence.*/
+	for (i = 0; i < params->num_remote_post_fences; i++) {
+		s32 handle = 0;
+
+		handle = params->remote_post_fences[i];
+		ret = validate_import_handle(ctx, handle, STREAM_OBJ_TYPE_SYNC);
+		if (ret)
+			return ret;
+	}
+
+	/* for each flush-range.*/
+	for (i = 0; i < params->num_flush_ranges; i++) {
+		struct nvscic2c_pcie_flush_range *flush_range = NULL;
+
+		flush_range = &params->flush_ranges[i];
+		ret = validate_flush_range(ctx, flush_range);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+static int
+copy_args_from_user(struct stream_ext_ctx_t *ctx,
+		    struct nvscic2c_pcie_submit_copy_args *args,
+		    struct copy_req_params *params)
+{
+	int ret = 0;
+
+	if (WARN_ON(!args->num_local_post_fences ||
+		    !args->num_flush_ranges ||
+		    !args->num_remote_post_fences))
+		return -EINVAL;
+
+	if ((args->num_local_post_fences + args->num_remote_post_fences) >
+	    ctx->cr_limits.max_post_fences)
+		return -EINVAL;
+	if (args->num_flush_ranges > ctx->cr_limits.max_flush_ranges)
+		return -EINVAL;
+
+	params->num_local_post_fences = args->num_local_post_fences;
+	params->num_remote_post_fences = args->num_remote_post_fences;
+	params->num_flush_ranges = args->num_flush_ranges;
+
+	ret = copy_from_user(params->local_post_fences,
+			     (void __user *)args->local_post_fences,
+			     (params->num_local_post_fences * sizeof(s32)));
+	if (ret < 0)
+		return -EFAULT;
+
+	ret = copy_from_user(params->remote_post_fences,
+			     (void __user *)args->remote_post_fences,
+			     (params->num_remote_post_fences * sizeof(s32)));
+	if (ret < 0)
+		return -EFAULT;
+
+	ret = copy_from_user(params->remote_post_fence_values,
+			     (void __user *)args->remote_post_fence_values,
+			     (params->num_remote_post_fences * sizeof(u64)));
+	if (ret < 0)
+		return -EFAULT;
+
+	ret = copy_from_user(params->flush_ranges,
+			     (void __user *)args->flush_ranges,
+			     (params->num_flush_ranges *
+			      sizeof(struct nvscic2c_pcie_flush_range)));
+	if (ret < 0)
+		return -EFAULT;
+
+	return 0;
+}
+
+static void
+free_copy_request(struct copy_request **copy_request)
+{
+	struct copy_request *cr = *copy_request;
+
+	if (!cr)
+		return;
+
+	kfree(cr->local_post_fences);
+	kfree(cr->remote_post_fences);
+	kfree(cr->remote_buf_objs);
+	kfree(cr->remote_post_fence_values);
+	kfree(cr->edma_desc);
+	kfree(cr->handles);
+	kfree(cr);
+
+	*copy_request = NULL;
+}
+
+static int
+allocate_copy_request(struct stream_ext_ctx_t *ctx,
+		      struct copy_request **copy_request)
+{
+	int ret = 0;
+	struct copy_request *cr = NULL;
+
+	/*worst-case allocation for each copy request.*/
+
+	cr = kzalloc(sizeof(*cr), GFP_KERNEL);
+	if (WARN_ON(!cr)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	cr->ctx = ctx;
+
+	/* flush range has two handles: src, dst + all possible post_fences.*/
+	cr->handles = kzalloc((sizeof(*cr->handles) *
+				((2 * ctx->cr_limits.max_flush_ranges) +
+				(ctx->cr_limits.max_post_fences))),
+				GFP_KERNEL);
+	if (WARN_ON(!cr->handles)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/*
+	 * edma_desc shall include flush_range + worst-case all post-fences
+	 * (all max_post_fences could be remote_post_fence which need be eDMAd).
+	 */
+	cr->edma_desc = kzalloc((sizeof(*cr->edma_desc) *
+				(ctx->cr_limits.max_flush_ranges +
+				ctx->cr_limits.max_post_fences)),
+				GFP_KERNEL);
+	if (WARN_ON(!cr->edma_desc)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* OR all max_post_fences could be local_post_fence. */
+	cr->local_post_fences = kzalloc((sizeof(*cr->local_post_fences) *
+					ctx->cr_limits.max_post_fences),
+					GFP_KERNEL);
+	if (WARN_ON(!cr->local_post_fences)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	cr->remote_post_fences = kzalloc((sizeof(*cr->remote_post_fences) *
+					ctx->cr_limits.max_post_fences),
+					GFP_KERNEL);
+	if (WARN_ON(!cr->remote_post_fences)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	cr->remote_buf_objs = kzalloc((sizeof(*cr->remote_buf_objs) *
+					ctx->cr_limits.max_flush_ranges),
+					GFP_KERNEL);
+	if (WARN_ON(!cr->remote_buf_objs)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	cr->remote_post_fence_values =
+				kzalloc((sizeof(*cr->remote_post_fence_values) *
+				ctx->cr_limits.max_post_fences),
+				GFP_KERNEL);
+	if (WARN_ON(!cr->remote_post_fence_values)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	*copy_request = cr;
+	return ret;
+err:
+	free_copy_request(&cr);
+	return ret;
+}
+
+static void
+free_copy_req_params(struct copy_req_params *params)
+{
+	if (!params)
+		return;
+
+	kfree(params->flush_ranges);
+	params->flush_ranges = NULL;
+	kfree(params->local_post_fences);
+	params->local_post_fences = NULL;
+	kfree(params->remote_post_fences);
+	params->remote_post_fences = NULL;
+	kfree(params->remote_post_fence_values);
+	params->remote_post_fence_values = NULL;
+}
+
+static int
+allocate_copy_req_params(struct stream_ext_ctx_t *ctx,
+			 struct copy_req_params *params)
+{
+	int ret = 0;
+
+	/*worst-case allocation for each.*/
+
+	params->flush_ranges = kzalloc((sizeof(*params->flush_ranges) *
+					ctx->cr_limits.max_flush_ranges),
+				       GFP_KERNEL);
+	if (WARN_ON(!params->flush_ranges)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	params->local_post_fences =
+				kzalloc((sizeof(*params->local_post_fences) *
+					 ctx->cr_limits.max_post_fences),
+					GFP_KERNEL);
+	if (WARN_ON(!params->local_post_fences)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	params->remote_post_fences =
+				kzalloc((sizeof(*params->remote_post_fences) *
+					 ctx->cr_limits.max_post_fences),
+					GFP_KERNEL);
+	if (WARN_ON(!params->remote_post_fences)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	params->remote_post_fence_values =
+				kzalloc((sizeof(*params->remote_post_fence_values) *
+					 ctx->cr_limits.max_post_fences),
+					GFP_KERNEL);
+	if (WARN_ON(!params->remote_post_fence_values)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	return ret;
+err:
+	free_copy_req_params(params);
+	return ret;
+}
diff --git a/drivers/misc/nvscic2c-pcie/stream-extensions.h b/drivers/misc/nvscic2c-pcie/stream-extensions.h
new file mode 100644
index 00000000..07fc5ae4
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/stream-extensions.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+/*
+ * Internal to gos-nvscic2c module. This file is not supposed to be included
+ * by any other external modules.
+ */
+#ifndef __STREAM_EXTENSIONS_H__
+#define __STREAM_EXTENSIONS_H__
+
+#include <linux/types.h>
+
+#include "common.h"
+
+/* forward declaration. */
+struct driver_ctx_t;
+
+/* params to instantiate a stream-extension instance.*/
+struct stream_ext_params {
+	struct node_info_t *local_node;
+	struct node_info_t *peer_node;
+	u32 ep_id;
+	char *ep_name;
+	struct platform_device *host1x_pdev;
+	enum drv_mode_t drv_mode;
+	void *pci_client_h;
+	void *comm_channel_h;
+	void *vmap_h;
+	void *edma_h;
+};
+
+int
+stream_extension_ioctl(void *stream_ext_h, unsigned int cmd, void *arg);
+
+int
+stream_extension_init(struct stream_ext_params *params, void **handle);
+
+void
+stream_extension_deinit(void **handle);
+#endif //__STREAM_EXTENSIONS_H__
diff --git a/drivers/misc/nvscic2c-pcie/vmap-internal.h b/drivers/misc/nvscic2c-pcie/vmap-internal.h
new file mode 100644
index 00000000..1348d93c
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/vmap-internal.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __VMAP_INTERNAL_H__
+#define __VMAP_INTERNAL_H__
+
+#include <linux/dma-iommu.h>
+#include <linux/dma-buf.h>
+#include <linux/pci.h>
+
+#include "common.h"
+#include "vmap.h"
+
+/* forward declaration. */
+struct vmap_ctx_t;
+
+struct memobj_pin_t {
+	/* Input param fd -> dma_buf to be mapped.*/
+	struct dma_buf *dmabuf;
+
+	enum vmap_mngd mngd;
+	enum vmap_obj_prot prot;
+	enum vmap_obj_type type;
+
+	/* Input dmabuf mapped to pci-dev(dev mngd) or dummy dev(client mngd).*/
+	struct dma_buf_attachment *attach;
+	struct sg_table *sgt;
+	enum dma_data_direction dir;
+
+	/*
+	 * [OUT]contiguous iova region obtained from client (iova-mngr).
+	 * which input dmabuf is mapped to.
+	 */
+	void *iova_block_h;
+	struct vmap_obj_attributes attrib;
+
+	/*
+	 * [OUT]
+	 * Per scatter-list nent mapping -  used during free.
+	 * Used for client-managed map only.
+	 */
+	u32 nr_nents;
+	struct iova_nent {
+		u64 iova;
+		size_t len;
+		bool mapped_iova;
+	} *nents;
+};
+
+struct syncobj_pin_t {
+	s32 fd;
+	u32 syncpt_id;
+	struct host1x_syncpt *sp;
+	phys_addr_t phy_addr;
+
+	enum vmap_mngd mngd;
+	enum vmap_obj_prot prot;
+	enum vmap_obj_type type;
+
+	/* local sync objs do not require pinning to pcie address space.*/
+	bool pin_reqd;
+
+	/*
+	 * [OUT]contiguous iova region obtained from client (iova-mngr)
+	 * which syncpoint shim aper is mapped to.
+	 */
+	void *iova_block_h;
+	struct vmap_obj_attributes attrib;
+	bool mapped_iova;
+};
+
+struct importobj_reg_t {
+	/*
+	 * export descriptor and whereabouts of exported obj
+	 * as received from remote end.
+	 */
+	u64 export_desc;
+
+	/* times exported by remote, imported by local.*/
+	u32 nr_export;
+	u32 nr_import;
+
+	struct vmap_obj_attributes attrib;
+};
+
+/* virtual mapping information for Mem obj.*/
+struct memobj_map_ref {
+	s32 obj_id;
+	struct kref refcount;
+	struct memobj_pin_t pin;
+	struct vmap_ctx_t *vmap_ctx;
+};
+
+/* virtual mapping information for Sync obj. */
+struct syncobj_map_ref {
+	s32 obj_id;
+	struct kref refcount;
+	struct syncobj_pin_t pin;
+	struct vmap_ctx_t *vmap_ctx;
+};
+
+/* virtual mapping information for Imported obj. */
+struct importobj_map_ref {
+	s32 obj_id;
+	struct kref refcount;
+	struct importobj_reg_t reg;
+	struct vmap_ctx_t *vmap_ctx;
+};
+
+/* vmap subunit/abstraction context. */
+struct vmap_ctx_t {
+	/* pci-client abstraction handle.*/
+	void *pci_client_h;
+
+	/* comm-channel abstraction. */
+	void *comm_channel_h;
+
+	/* host1x platform device for syncpoint interfaces.*/
+	struct platform_device *host1x_pdev;
+
+	/*
+	 * dummy platform device. - This has smmu disabled to get the
+	 * physical addresses of exported Mem objects when using client
+	 * managed mapping.
+	 */
+	struct platform_device *dummy_pdev;
+	bool dummy_pdev_init;
+
+	/*
+	 * Management of Mem/Sync object Ids.
+	 *
+	 * All objects mapped are identified by - pin_id. IDR mechanism
+	 * generates these IDs. We maintain separate book-keeping for
+	 * Mem, Sync and Import objects. The ID shall overalap between
+	 * Mem, Sync and Import objects.
+	 *
+	 * ID is the pinned handle returned to other units.
+	 */
+	struct idr mem_idr;
+	struct idr sync_idr;
+	struct idr import_idr;
+
+	/* exclusive access to mem idr.*/
+	struct mutex mem_idr_lock;
+	/* exclusive access to sync idr.*/
+	struct mutex sync_idr_lock;
+	/* exclusive access to import idr.*/
+	struct mutex import_idr_lock;
+};
+
+void
+memobj_devmngd_unpin(struct vmap_ctx_t *vmap_ctx,
+		     struct memobj_pin_t *pin);
+int
+memobj_devmngd_pin(struct vmap_ctx_t *vmap_ctx,
+		   struct memobj_pin_t *pin);
+void
+memobj_clientmngd_unpin(struct vmap_ctx_t *vmap_ctx,
+			struct memobj_pin_t *pin);
+int
+memobj_clientmngd_pin(struct vmap_ctx_t *vmap_ctx,
+		      struct memobj_pin_t *pin);
+void
+memobj_unpin(struct vmap_ctx_t *vmap_ctx,
+	     struct memobj_pin_t *pin);
+int
+memobj_pin(struct vmap_ctx_t *vmap_ctx,
+	   struct memobj_pin_t *pin);
+void
+syncobj_clientmngd_unpin(struct vmap_ctx_t *vmap_ctx,
+			 struct syncobj_pin_t *pin);
+void
+syncobj_unpin(struct vmap_ctx_t *vmap_ctx,
+	      struct syncobj_pin_t *pin);
+int
+syncobj_pin(struct vmap_ctx_t *vmap_ctx,
+	    struct syncobj_pin_t *pin);
+
+#endif //__VMAP_INTERNAL_H__
diff --git a/drivers/misc/nvscic2c-pcie/vmap-pin.c b/drivers/misc/nvscic2c-pcie/vmap-pin.c
new file mode 100644
index 00000000..76670bbf
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/vmap-pin.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#define pr_fmt(fmt)	"nvscic2c-pcie: vmap-pin: " fmt
+
+#include <linux/device.h>
+#include <linux/dma-iommu.h>
+#include <linux/dma-buf.h>
+#include <linux/errno.h>
+#include <linux/host1x-next.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+#include <drm/tegra_drm-next.h>
+
+#include "common.h"
+#include "module.h"
+#include "pci-client.h"
+#include "vmap.h"
+#include "vmap-internal.h"
+
+void
+memobj_devmngd_unpin(struct vmap_ctx_t *vmap_ctx,
+		     struct memobj_pin_t *pin)
+{
+	if (!vmap_ctx)
+		return;
+
+	if (!pin)
+		return;
+
+	if (!(IS_ERR_OR_NULL(pin->sgt))) {
+		dma_buf_unmap_attachment(pin->attach, pin->sgt,	pin->dir);
+		pin->sgt = NULL;
+	}
+
+	if (!(IS_ERR_OR_NULL(pin->attach))) {
+		pci_client_dmabuf_detach(vmap_ctx->pci_client_h,
+					 pin->dmabuf, pin->attach);
+		pin->attach = NULL;
+	}
+}
+
+int
+memobj_devmngd_pin(struct vmap_ctx_t *vmap_ctx,
+		   struct memobj_pin_t *pin)
+{
+	int ret = 0;
+	u32 sg_index = 0;
+	struct scatterlist *sg = NULL;
+
+	if (pin->prot == VMAP_OBJ_PROT_WRITE)
+		pin->dir = DMA_FROM_DEVICE;
+	else
+		pin->dir = DMA_TO_DEVICE;
+
+	pin->attach = pci_client_dmabuf_attach(vmap_ctx->pci_client_h,
+					       pin->dmabuf);
+	if (IS_ERR_OR_NULL(pin->attach)) {
+		ret = PTR_ERR(pin->attach);
+		goto err;
+	}
+
+	pin->sgt = dma_buf_map_attachment(pin->attach, pin->dir);
+	if (IS_ERR_OR_NULL(pin->sgt)) {
+		ret = PTR_ERR(pin->sgt);
+		goto err;
+	}
+
+	/* dma address (for all nents) are deemed contiguous for smmu=on.*/
+	pin->attrib.iova = sg_dma_address(pin->sgt->sgl);
+	for_each_sg(pin->sgt->sgl, sg, pin->sgt->nents, sg_index) {
+		pin->attrib.size += sg->length;
+	}
+
+	/*
+	 * dev mngd used in local mem or remote mem (when exporting from
+	 * Tegra PCIe RP), in both cases, offsetof is not needed.
+	 */
+	pin->attrib.offsetof = 0;
+
+	return ret;
+err:
+	memobj_devmngd_unpin(vmap_ctx, pin);
+	return ret;
+}
+
+void
+memobj_clientmngd_unpin(struct vmap_ctx_t *vmap_ctx,
+			struct memobj_pin_t *pin)
+{
+	u32 i = 0;
+
+	if (!vmap_ctx)
+		return;
+
+	if (!pin)
+		return;
+
+	if (pin->nents) {
+		for (i = 0; i < pin->nr_nents; i++) {
+			if (pin->nents[i].mapped_iova) {
+				pci_client_unmap_addr(vmap_ctx->pci_client_h,
+						      pin->nents[i].iova,
+						      pin->nents[i].len);
+				pin->nents[i].mapped_iova = false;
+			}
+		}
+		kfree(pin->nents);
+		pin->nents = NULL;
+	}
+
+	if (pin->iova_block_h) {
+		pci_client_free_iova(vmap_ctx->pci_client_h,
+				     &pin->iova_block_h);
+		pin->iova_block_h = NULL;
+	}
+
+	if (!(IS_ERR_OR_NULL(pin->sgt))) {
+		dma_buf_unmap_attachment(pin->attach, pin->sgt, pin->dir);
+		pin->sgt = NULL;
+	}
+
+	if (!(IS_ERR_OR_NULL(pin->attach))) {
+		dma_buf_detach(pin->dmabuf, pin->attach);
+		pin->attach = NULL;
+	}
+}
+
+int
+memobj_clientmngd_pin(struct vmap_ctx_t *vmap_ctx,
+		      struct memobj_pin_t *pin)
+{
+	int ret = 0;
+	u64 iova = 0;
+	u32 sg_index = 0;
+	int prot = IOMMU_WRITE;
+	struct scatterlist *sg = NULL;
+
+	if (pin->prot == VMAP_OBJ_PROT_WRITE) {
+		prot = IOMMU_WRITE;
+		pin->dir = DMA_FROM_DEVICE;
+	} else {
+		prot = IOMMU_READ;
+		pin->dir = DMA_TO_DEVICE;
+	}
+
+	/*
+	 * pin to dummy device (which has smmu disabled) to get scatter-list
+	 * of phys addr.
+	 */
+	pin->attach = dma_buf_attach(pin->dmabuf, &vmap_ctx->dummy_pdev->dev);
+	if (IS_ERR_OR_NULL(pin->attach)) {
+		ret = PTR_ERR(pin->attach);
+		pr_err("client_mngd dma_buf_attach failed\n");
+		goto err;
+	}
+	pin->sgt = dma_buf_map_attachment(pin->attach, pin->dir);
+	if (IS_ERR_OR_NULL(pin->sgt)) {
+		ret = PTR_ERR(pin->sgt);
+		pr_err("client_mngd dma_buf_attachment failed\n");
+		goto err;
+	}
+
+	for_each_sg(pin->sgt->sgl, sg, pin->sgt->nents, sg_index)
+		pin->attrib.size += sg->length;
+
+	/* get one contiguous iova.*/
+	ret = pci_client_alloc_iova(vmap_ctx->pci_client_h, pin->attrib.size,
+				    &pin->attrib.iova, &pin->attrib.offsetof,
+				    &pin->iova_block_h);
+	if (ret) {
+		pr_err("Failed to reserve iova block of size: (%lu)\n",
+		       pin->attrib.size);
+		goto err;
+	}
+
+	/* pin the scatter list to contiguous iova.*/
+	pin->nr_nents = pin->sgt->nents;
+	pin->nents = kzalloc((sizeof(*pin->nents) * pin->nr_nents), GFP_KERNEL);
+	if (WARN_ON(!pin->nents)) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	iova = pin->attrib.iova;
+	for_each_sg(pin->sgt->sgl, sg, pin->sgt->nents, sg_index) {
+		phys_addr_t paddr = (phys_addr_t)(sg_phys(sg));
+
+		pin->nents[sg_index].iova = iova;
+		pin->nents[sg_index].len = sg->length;
+		ret = pci_client_map_addr(vmap_ctx->pci_client_h,
+					  pin->nents[sg_index].iova, paddr,
+					  pin->nents[sg_index].len,
+					  (IOMMU_CACHE | prot));
+		if (ret < 0) {
+			pr_err("Failed: to iommu_map sg_nent: (%u), size: (%u)\n",
+			       sg_index, sg->length);
+			goto err;
+		}
+		pin->nents[sg_index].mapped_iova = true;
+
+		/* store information for unmap.*/
+		iova += sg->length;
+	}
+
+	return ret;
+err:
+	memobj_clientmngd_unpin(vmap_ctx, pin);
+	return ret;
+}
+
+void
+memobj_unpin(struct vmap_ctx_t *vmap_ctx,
+	     struct memobj_pin_t *pin)
+{
+	if (!vmap_ctx)
+		return;
+
+	if (!pin)
+		return;
+
+	if (pin->mngd == VMAP_MNGD_CLIENT)
+		memobj_clientmngd_unpin(vmap_ctx, pin);
+	else
+		memobj_devmngd_unpin(vmap_ctx, pin);
+
+	dma_buf_put(pin->dmabuf); // get_dma_buf();
+}
+
+int
+memobj_pin(struct vmap_ctx_t *vmap_ctx,
+	   struct memobj_pin_t *pin)
+{
+	int ret = 0;
+
+	/* ref count till we unmap. */
+	get_dma_buf(pin->dmabuf);
+
+	if (pin->mngd == VMAP_MNGD_CLIENT)
+		ret = memobj_clientmngd_pin(vmap_ctx, pin);
+	else
+		ret = memobj_devmngd_pin(vmap_ctx, pin);
+
+	if (ret)
+		memobj_unpin(vmap_ctx, pin);
+
+	return ret;
+}
+
+void
+syncobj_clientmngd_unpin(struct vmap_ctx_t *vmap_ctx,
+			 struct syncobj_pin_t *pin)
+{
+	if (!vmap_ctx)
+		return;
+
+	if (!pin)
+		return;
+
+	if (pin->mapped_iova) {
+		pci_client_unmap_addr(vmap_ctx->pci_client_h,
+				      pin->attrib.iova, pin->attrib.size);
+		pin->mapped_iova = false;
+	}
+
+	if (pin->iova_block_h) {
+		pci_client_free_iova(vmap_ctx->pci_client_h,
+				     &pin->iova_block_h);
+		pin->iova_block_h = NULL;
+	}
+}
+
+static int
+syncobj_clientmngd_pin(struct vmap_ctx_t *vmap_ctx,
+		       struct syncobj_pin_t *pin)
+{
+	int ret = 0;
+
+	if (pin->prot != VMAP_OBJ_PROT_WRITE) {
+		pr_err("Pinning syncobj with read access not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	ret = pci_client_alloc_iova(vmap_ctx->pci_client_h, pin->attrib.size,
+				    &pin->attrib.iova, &pin->attrib.offsetof,
+				    &pin->iova_block_h);
+	if (ret) {
+		pr_err("Failed to reserve iova block of size: (%lu)\n",
+		       pin->attrib.size);
+		goto err;
+	}
+
+	ret = pci_client_map_addr(vmap_ctx->pci_client_h, pin->attrib.iova,
+				  pin->phy_addr, pin->attrib.size,
+				  (IOMMU_CACHE | IOMMU_WRITE));
+	if (ret) {
+		pr_err("Failed to pin syncpoint physical addr to client iova\n");
+		goto err;
+	}
+	pin->mapped_iova = true;
+
+	return ret;
+err:
+	syncobj_clientmngd_unpin(vmap_ctx, pin);
+	return ret;
+}
+
+void
+syncobj_unpin(struct vmap_ctx_t *vmap_ctx,
+	      struct syncobj_pin_t *pin)
+{
+	if (!vmap_ctx)
+		return;
+
+	if (!pin)
+		return;
+
+	if (pin->pin_reqd) {
+		if (pin->mngd == VMAP_MNGD_DEV)
+			return;
+
+		syncobj_clientmngd_unpin(vmap_ctx, pin);
+	}
+
+	if (pin->sp) {
+		host1x_syncpt_put(pin->sp);
+		pin->sp = NULL;
+	}
+}
+
+int
+syncobj_pin(struct vmap_ctx_t *vmap_ctx,
+	    struct syncobj_pin_t *pin)
+{
+	int ret = 0;
+	struct host1x *host1x = NULL;
+
+	host1x = platform_get_drvdata(vmap_ctx->host1x_pdev);
+	if (!host1x) {
+		ret = -EINVAL;
+		pr_err("Could not get host1x handle from host1x_pdev.");
+		goto err;
+	}
+
+	/*
+	 * Get host1x_syncpt using syncpoint id and fd.
+	 * Takes ref on syncpoint.
+	 */
+	pin->sp = tegra_drm_get_syncpt(pin->fd, pin->syncpt_id);
+	if (IS_ERR_OR_NULL(pin->sp)) {
+		ret = PTR_ERR(pin->sp);
+		pr_err("Failed to get syncpoint from id\n");
+		goto err;
+	}
+
+	pin->attrib.syncpt_id = pin->syncpt_id;
+	pin->attrib.size = SP_MAP_SIZE;
+	if (pin->pin_reqd) {
+		pin->phy_addr = get_syncpt_shim_offset(pin->syncpt_id);
+		/*
+		 * remote/export sync obj are mapped to an iova of client
+		 * choice always and we should not come here for local sync objs
+		 */
+		if (pin->mngd == VMAP_MNGD_DEV) {
+			ret = -EOPNOTSUPP;
+			goto err;
+		}
+
+		ret = syncobj_clientmngd_pin(vmap_ctx, pin);
+	}
+
+err:
+	if (ret)
+		syncobj_unpin(vmap_ctx, pin);
+
+	return ret;
+}
diff --git a/drivers/misc/nvscic2c-pcie/vmap.c b/drivers/misc/nvscic2c-pcie/vmap.c
new file mode 100644
index 00000000..2c3d9261
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/vmap.c
@@ -0,0 +1,833 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#define pr_fmt(fmt)	"nvscic2c-pcie: vmap: " fmt
+
+#include "comm-channel.h"
+#include "common.h"
+#include "descriptor.h"
+#include "module.h"
+#include "pci-client.h"
+#include "vmap.h"
+#include "vmap-internal.h"
+
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+
+/*
+ * *_START must be > 0 to avoid interference with idr_for_each().
+ */
+#define MEMOBJ_START	(1)
+#define SYNCOBJ_START	(1)
+#define IMPORTOBJ_START	(1)
+#define MEMOBJ_END	(MAX_STREAM_MEMOBJS)
+#define SYNCOBJ_END	(MAX_STREAM_SYNCOBJS)
+#define IMPORTOBJ_END	(MAX_STREAM_MEMOBJS + MAX_STREAM_SYNCOBJS)
+
+static int
+match_dmabuf(int id, void *entry, void *data)
+{
+	struct memobj_map_ref *map = (struct memobj_map_ref *)entry;
+
+	if (map->pin.dmabuf == (struct dma_buf *)data)
+		return id;
+
+	/* 0 shall pick-up next entry.*/
+	return 0;
+}
+
+static int
+memobj_map(struct vmap_ctx_t *vmap_ctx,
+	   struct vmap_memobj_map_params *params,
+	   struct vmap_obj_attributes *attrib)
+{
+	int ret = 0;
+	s32 id_exist = 0;
+	struct memobj_map_ref *map = NULL;
+	struct dma_buf *dmabuf = NULL;
+
+	dmabuf = dma_buf_get(params->fd);
+	if (IS_ERR_OR_NULL(dmabuf)) {
+		pr_err("Failed to get dma_buf for Mem Fd: (%d)\n",
+		       params->fd);
+		return -EFAULT;
+	}
+
+	mutex_lock(&vmap_ctx->mem_idr_lock);
+
+	/* check if the dma_buf is already mapped ? */
+	id_exist = idr_for_each(&vmap_ctx->mem_idr, match_dmabuf, &dmabuf);
+	if (id_exist > 0)
+		map = idr_find(&vmap_ctx->mem_idr, id_exist);
+
+	if (map) {
+		/* already mapped.*/
+		/*
+		 * requested mapping type != already mapped type.
+		 * e.g: mem obj previously mapped with dev mngd and now
+		 * as client mngd.
+		 */
+		if (params->mngd != map->pin.mngd) {
+			pr_err("Memobj: Already mapped with another mngd\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		/*
+		 * add a validation later when rid=sid is enabled, where it
+		 * shall be dev_mngd in both case but dev shall be different.
+		 */
+		kref_get(&map->refcount);
+	} else {
+		map = kzalloc(sizeof(*map), GFP_KERNEL);
+		if (WARN_ON(!map)) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		map->vmap_ctx = vmap_ctx;
+		kref_init(&map->refcount);
+		map->pin.dmabuf = dmabuf;
+		map->pin.prot = params->prot;
+		map->pin.mngd = params->mngd;
+		map->obj_id = idr_alloc(&vmap_ctx->mem_idr, map,
+					MEMOBJ_START, MEMOBJ_END,
+					GFP_KERNEL);
+		if (map->obj_id <= 0) {
+			ret = map->obj_id;
+			pr_err("Failed to idr alloc for mem obj\n");
+			kfree(map);
+			goto err;
+		}
+
+		/* populates map->pin.attrib within.*/
+		ret = memobj_pin(vmap_ctx, &map->pin);
+		if (ret) {
+			pr_err("Failed to pin mem obj fd: (%d)\n", params->fd);
+			idr_remove(&vmap_ctx->mem_idr, map->obj_id);
+			kfree(map);
+			goto err;
+		}
+	}
+
+	attrib->type = VMAP_OBJ_TYPE_MEM;
+	attrib->id = map->obj_id;
+	attrib->iova = map->pin.attrib.iova;
+	attrib->size = map->pin.attrib.size;
+	attrib->offsetof = map->pin.attrib.offsetof;
+err:
+	mutex_unlock(&vmap_ctx->mem_idr_lock);
+	dma_buf_put(dmabuf); //dma_buf_get()
+	return ret;
+}
+
+/* must be called with idr lock held.*/
+static void
+memobj_free(struct kref *kref)
+{
+	struct memobj_map_ref *map = NULL;
+
+	if (!kref)
+		return;
+
+	map = container_of(kref, struct memobj_map_ref, refcount);
+	if (map) {
+		memobj_unpin(map->vmap_ctx, &map->pin);
+		idr_remove(&map->vmap_ctx->mem_idr, map->obj_id);
+		kfree(map);
+	}
+}
+
+static int
+memobj_unmap(struct vmap_ctx_t *vmap_ctx, s32 obj_id)
+{
+	struct memobj_map_ref *map = NULL;
+
+	mutex_lock(&vmap_ctx->mem_idr_lock);
+	map = idr_find(&vmap_ctx->mem_idr, obj_id);
+	if (!map) {
+		mutex_unlock(&vmap_ctx->mem_idr_lock);
+		return -EBADF;
+	}
+
+	kref_put(&map->refcount, memobj_free);
+	mutex_unlock(&vmap_ctx->mem_idr_lock);
+
+	return 0;
+}
+
+static int
+memobj_putref(struct vmap_ctx_t *vmap_ctx, s32 obj_id)
+{
+	return memobj_unmap(vmap_ctx, obj_id);
+}
+
+static int
+memobj_getref(struct vmap_ctx_t *vmap_ctx, s32 obj_id)
+{
+	struct memobj_map_ref *map = NULL;
+
+	mutex_lock(&vmap_ctx->mem_idr_lock);
+	map = idr_find(&vmap_ctx->mem_idr, obj_id);
+	if (WARN_ON(!map)) {
+		mutex_unlock(&vmap_ctx->mem_idr_lock);
+		return -EBADF;
+	}
+
+	kref_get(&map->refcount);
+	mutex_unlock(&vmap_ctx->mem_idr_lock);
+
+	return 0;
+}
+
+static int
+match_syncpt_id(int id, void *entry, void *data)
+{
+	struct syncobj_map_ref *map = (struct syncobj_map_ref *)entry;
+
+	if (map->pin.syncpt_id == *((u32 *)data))
+		return id;
+
+	/* 0 shall pick-up next entry.*/
+	return 0;
+}
+
+static int
+syncobj_map(struct vmap_ctx_t *vmap_ctx,
+	    struct vmap_syncobj_map_params *params,
+	    struct vmap_obj_attributes *attrib)
+{
+	int ret = 0;
+	s32 id_exist = 0;
+	u32 syncpt_id = 0;
+	struct syncobj_map_ref *map = NULL;
+
+	syncpt_id = params->id;
+	mutex_lock(&vmap_ctx->sync_idr_lock);
+
+	/* check if the syncpt is already mapped ? */
+	id_exist = idr_for_each(&vmap_ctx->sync_idr, match_syncpt_id,
+				&syncpt_id);
+	if (id_exist > 0)
+		map = idr_find(&vmap_ctx->sync_idr, id_exist);
+
+	if (map) {
+		/* mapping again a SYNC obj(local or remote) is not permitted.*/
+		ret = -EPERM;
+		goto err;
+	} else {
+		map = kzalloc(sizeof(*map), GFP_KERNEL);
+		if (WARN_ON(!map)) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		map->vmap_ctx = vmap_ctx;
+		kref_init(&map->refcount);
+		map->obj_id = idr_alloc(&vmap_ctx->sync_idr, map,
+					SYNCOBJ_START, SYNCOBJ_END,
+					GFP_KERNEL);
+		if (map->obj_id <= 0) {
+			ret = map->obj_id;
+			pr_err("Failed to idr alloc for sync obj\n");
+			kfree(map);
+			goto err;
+		}
+
+		/* local syncobjs do not need to be pinned to pcie iova.*/
+		map->pin.fd = params->fd;
+		map->pin.syncpt_id = syncpt_id;
+		map->pin.pin_reqd = params->pin_reqd;
+		map->pin.prot = params->prot;
+		map->pin.mngd = params->mngd;
+		ret = syncobj_pin(vmap_ctx, &map->pin);
+		if (ret) {
+			pr_err("Failed to pin sync obj Id: (%d)\n",
+			       syncpt_id);
+			idr_remove(&vmap_ctx->sync_idr, map->obj_id);
+			kfree(map);
+			goto err;
+		}
+		attrib->type = VMAP_OBJ_TYPE_SYNC;
+		attrib->id = map->obj_id;
+		attrib->iova = map->pin.attrib.iova;
+		attrib->size = map->pin.attrib.size;
+		attrib->offsetof = map->pin.attrib.offsetof;
+		attrib->syncpt_id = map->pin.attrib.syncpt_id;
+	}
+err:
+	mutex_unlock(&vmap_ctx->sync_idr_lock);
+
+	return ret;
+}
+
+/* must be called with idr lock held.*/
+static void
+syncobj_free(struct kref *kref)
+{
+	struct syncobj_map_ref *map = NULL;
+
+	if (!kref)
+		return;
+
+	map = container_of(kref, struct syncobj_map_ref, refcount);
+	if (map) {
+		syncobj_unpin(map->vmap_ctx, &map->pin);
+		idr_remove(&map->vmap_ctx->sync_idr, map->obj_id);
+		kfree(map);
+	}
+}
+
+static int
+syncobj_unmap(struct vmap_ctx_t *vmap_ctx, s32 obj_id)
+{
+	struct syncobj_map_ref *map = NULL;
+
+	mutex_lock(&vmap_ctx->sync_idr_lock);
+	map = idr_find(&vmap_ctx->sync_idr, obj_id);
+	if (!map) {
+		mutex_unlock(&vmap_ctx->sync_idr_lock);
+		return -EBADF;
+	}
+
+	kref_put(&map->refcount, syncobj_free);
+	mutex_unlock(&vmap_ctx->sync_idr_lock);
+
+	return 0;
+}
+
+static int
+syncobj_putref(struct vmap_ctx_t *vmap_ctx, s32 obj_id)
+{
+	return syncobj_unmap(vmap_ctx, obj_id);
+}
+
+static int
+syncobj_getref(struct vmap_ctx_t *vmap_ctx, s32 obj_id)
+{
+	struct memobj_map_ref *map = NULL;
+
+	if (!vmap_ctx)
+		return -EINVAL;
+
+	mutex_lock(&vmap_ctx->sync_idr_lock);
+	map = idr_find(&vmap_ctx->sync_idr, obj_id);
+	if (WARN_ON(!map)) {
+		mutex_unlock(&vmap_ctx->sync_idr_lock);
+		return -EBADF;
+	}
+
+	kref_get(&map->refcount);
+	mutex_unlock(&vmap_ctx->sync_idr_lock);
+
+	return 0;
+}
+
+static int
+match_export_desc(int id, void *entry, void *data)
+{
+	struct importobj_map_ref *map = (struct importobj_map_ref *)entry;
+
+	if (map->reg.export_desc == *((u64 *)data))
+		return id;
+
+	/* 0 shall pick-up next entry.*/
+	return 0;
+}
+
+static int
+importobj_map(struct vmap_ctx_t *vmap_ctx,
+	      struct vmap_importobj_map_params *params,
+	      struct vmap_obj_attributes *attrib)
+{
+	int ret = 0;
+	s32 id_exist = 0;
+	struct importobj_map_ref *map = NULL;
+
+	mutex_lock(&vmap_ctx->import_idr_lock);
+
+	/* check if we have export descriptor from remote already ? */
+	id_exist = idr_for_each(&vmap_ctx->import_idr, match_export_desc,
+				&params->export_desc);
+	if (id_exist > 0)
+		map = idr_find(&vmap_ctx->import_idr, id_exist);
+
+	if (!map) {
+		ret = -EAGAIN;
+		pr_debug("Failed to find descriptor: (%llu), try again\n",
+			 params->export_desc);
+		goto err;
+	} else {
+		/* importing beyond the export from remote is not permitted.*/
+		if (map->reg.nr_import == map->reg.nr_export) {
+			ret = -EPERM;
+			goto err;
+		}
+		map->reg.nr_import++;
+
+		attrib->type = VMAP_OBJ_TYPE_IMPORT;
+		attrib->id = map->obj_id;
+		attrib->iova = map->reg.attrib.iova;
+		attrib->size = map->reg.attrib.size;
+		attrib->offsetof = map->reg.attrib.offsetof;
+	}
+err:
+	mutex_unlock(&vmap_ctx->import_idr_lock);
+
+	return ret;
+}
+
+/* must be called with idr lock held.*/
+static void
+importobj_free(struct kref *kref)
+{
+	struct importobj_map_ref *map = NULL;
+
+	if (!kref)
+		return;
+
+	map = container_of(kref, struct importobj_map_ref, refcount);
+	if (map) {
+		idr_remove(&map->vmap_ctx->import_idr, map->obj_id);
+		kfree(map);
+	}
+}
+
+static int
+importobj_unmap(struct vmap_ctx_t *vmap_ctx, s32 obj_id)
+{
+	struct importobj_map_ref *map = NULL;
+	struct comm_msg msg = {0};
+
+	mutex_lock(&vmap_ctx->import_idr_lock);
+
+	map = idr_find(&vmap_ctx->import_idr, obj_id);
+	if (!map) {
+		mutex_unlock(&vmap_ctx->import_idr_lock);
+		return -EINVAL;
+	}
+	if (WARN_ON(!map->reg.nr_import)) {
+		pr_err("Import ObjId: (%d) wasn't imported yet\n", obj_id);
+		mutex_unlock(&vmap_ctx->import_idr_lock);
+		return -EINVAL;
+	}
+
+	/*
+	 * Each import corresponds to an export, if we unmap an imported
+	 * object, it's exported instance is also refcounted. Remote must
+	 * export again for it to be imported on local SoC again.
+	 */
+	msg.type = COMM_MSG_TYPE_UNREGISTER;
+	msg.u.unreg.export_desc = map->reg.export_desc;
+	msg.u.unreg.iova = map->reg.attrib.iova;
+	msg.u.unreg.size = map->reg.attrib.size;
+	msg.u.unreg.offsetof = map->reg.attrib.offsetof;
+	comm_channel_msg_send(vmap_ctx->comm_channel_h, &msg);
+
+	kref_put(&map->refcount, importobj_free);
+	mutex_unlock(&vmap_ctx->import_idr_lock);
+
+	return 0;
+}
+
+static int
+importobj_putref(struct vmap_ctx_t *vmap_ctx, s32 obj_id)
+{
+	return importobj_unmap(vmap_ctx, obj_id);
+}
+
+static int
+importobj_getref(struct vmap_ctx_t *vmap_ctx, s32 obj_id)
+{
+	struct memobj_map_ref *map = NULL;
+
+	mutex_lock(&vmap_ctx->import_idr_lock);
+	map = idr_find(&vmap_ctx->import_idr, obj_id);
+	if (WARN_ON(!map)) {
+		mutex_unlock(&vmap_ctx->import_idr_lock);
+		return -EBADF;
+	}
+
+	kref_get(&map->refcount);
+	mutex_unlock(&vmap_ctx->import_idr_lock);
+
+	return 0;
+}
+
+int
+vmap_obj_map(void *vmap_h, struct vmap_obj_map_params *params,
+	     struct vmap_obj_attributes *attrib)
+{
+	int ret = 0;
+	struct vmap_ctx_t *vmap_ctx = (struct vmap_ctx_t *)vmap_h;
+
+	if (WARN_ON(!vmap_ctx || !params || !attrib))
+		return -EINVAL;
+
+	switch (params->type) {
+	case VMAP_OBJ_TYPE_MEM:
+		ret = memobj_map(vmap_ctx, &params->u.memobj, attrib);
+		break;
+	case VMAP_OBJ_TYPE_SYNC:
+		ret = syncobj_map(vmap_ctx, &params->u.syncobj, attrib);
+		break;
+	case VMAP_OBJ_TYPE_IMPORT:
+		ret = importobj_map(vmap_ctx, &params->u.importobj, attrib);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+int
+vmap_obj_unmap(void *vmap_h, enum vmap_obj_type type, s32 obj_id)
+{
+	int ret = 0;
+	struct vmap_ctx_t *vmap_ctx = (struct vmap_ctx_t *)vmap_h;
+
+	if (WARN_ON(!vmap_ctx))
+		return -EINVAL;
+
+	switch (type) {
+	case VMAP_OBJ_TYPE_MEM:
+		ret = memobj_unmap(vmap_ctx, obj_id);
+		break;
+	case VMAP_OBJ_TYPE_SYNC:
+		ret = syncobj_unmap(vmap_ctx, obj_id);
+		break;
+	case VMAP_OBJ_TYPE_IMPORT:
+		ret = importobj_unmap(vmap_ctx, obj_id);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+int
+vmap_obj_getref(void *vmap_h, enum vmap_obj_type type, s32 obj_id)
+{
+	int ret = 0;
+	struct vmap_ctx_t *vmap_ctx = (struct vmap_ctx_t *)vmap_h;
+
+	if (WARN_ON(!vmap_ctx))
+		return -EINVAL;
+
+	switch (type) {
+	case VMAP_OBJ_TYPE_MEM:
+		ret = memobj_getref(vmap_ctx, obj_id);
+		break;
+	case VMAP_OBJ_TYPE_SYNC:
+		ret = syncobj_getref(vmap_ctx, obj_id);
+		break;
+	case VMAP_OBJ_TYPE_IMPORT:
+		ret = importobj_getref(vmap_ctx, obj_id);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+int
+vmap_obj_putref(void *vmap_h, enum vmap_obj_type type, s32 obj_id)
+{
+	int ret = 0;
+	struct vmap_ctx_t *vmap_ctx = (struct vmap_ctx_t *)vmap_h;
+
+	if (WARN_ON(!vmap_ctx))
+		return -EINVAL;
+
+	switch (type) {
+	case VMAP_OBJ_TYPE_MEM:
+		ret = memobj_putref(vmap_ctx, obj_id);
+		break;
+	case VMAP_OBJ_TYPE_SYNC:
+		ret = syncobj_putref(vmap_ctx, obj_id);
+		break;
+	case VMAP_OBJ_TYPE_IMPORT:
+		ret = importobj_putref(vmap_ctx, obj_id);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static void
+vmap_importobj_unregister(void *data, void *ctx)
+{
+	struct vmap_ctx_t *vmap_ctx = (struct vmap_ctx_t *)ctx;
+	struct comm_msg *msg = (struct comm_msg *)data;
+	union descriptor_t desc;
+
+	WARN_ON(!vmap_ctx);
+	WARN_ON(!msg);
+	WARN_ON(msg->type != COMM_MSG_TYPE_UNREGISTER);
+
+	desc.value = msg->u.unreg.export_desc;
+	pr_debug("Unregister Desc: (%llu)\n", desc.value);
+	if (desc.bit.handle_type == STREAM_OBJ_TYPE_MEM)
+		vmap_obj_putref(vmap_ctx, VMAP_OBJ_TYPE_MEM,
+				desc.bit.handle_id);
+	else
+		vmap_obj_putref(vmap_ctx, VMAP_OBJ_TYPE_SYNC,
+				desc.bit.handle_id);
+}
+
+static void
+vmap_importobj_register(void *data, void *ctx)
+{
+	struct vmap_ctx_t *vmap_ctx = (struct vmap_ctx_t *)ctx;
+	struct comm_msg *msg = (struct comm_msg *)data;
+	struct importobj_map_ref *map = NULL;
+	s32 id_exist = 0;
+
+	WARN_ON(!vmap_ctx);
+	WARN_ON(!msg);
+	WARN_ON(msg->type != COMM_MSG_TYPE_REGISTER);
+
+	mutex_lock(&vmap_ctx->import_idr_lock);
+
+	/* check if we have export descriptor from remote already ? */
+	id_exist = idr_for_each(&vmap_ctx->import_idr, match_export_desc,
+				&msg->u.reg.export_desc);
+	if (id_exist > 0)
+		map = idr_find(&vmap_ctx->import_idr, id_exist);
+
+	if (map) {
+		if (msg->u.reg.iova != map->reg.attrib.iova) {
+			pr_err("attrib:iova doesn't match for export desc\n");
+			goto err;
+		}
+		if (msg->u.reg.size != map->reg.attrib.size) {
+			pr_err("attrib:size doesn't match for export desc\n");
+			goto err;
+		}
+		if (msg->u.reg.offsetof != map->reg.attrib.offsetof) {
+			pr_err("attrib:offsetof doesn't match for export desc\n");
+			goto err;
+		}
+		map->reg.nr_export++;
+		kref_get(&map->refcount);
+		pr_debug("Registered descriptor again: (%llu)\n",
+			 map->reg.export_desc);
+	} else {
+		/* map for the first time.*/
+		map = kzalloc(sizeof(*map), GFP_KERNEL);
+		if (WARN_ON(!map))
+			goto err;
+
+		map->vmap_ctx = vmap_ctx;
+		kref_init(&map->refcount);
+		map->reg.nr_export = 1;
+		map->reg.export_desc = msg->u.reg.export_desc;
+		map->reg.attrib.iova = msg->u.reg.iova;
+		map->reg.attrib.size = msg->u.reg.size;
+		map->reg.attrib.offsetof = msg->u.reg.offsetof;
+		map->obj_id = idr_alloc(&vmap_ctx->import_idr, map,
+					IMPORTOBJ_START, IMPORTOBJ_END,
+					GFP_KERNEL);
+		if (map->obj_id <= 0) {
+			pr_err("Failed to idr alloc for import obj\n");
+			kfree(map);
+			goto err;
+		}
+		pr_debug("Registered descriptor: (%llu)\n", map->reg.export_desc);
+	}
+err:
+	mutex_unlock(&vmap_ctx->import_idr_lock);
+}
+
+/* Entry point for the virtual mapping sub-module/abstraction. */
+int
+vmap_init(struct driver_ctx_t *drv_ctx, void **vmap_h)
+{
+	int ret = 0;
+	struct callback_ops cb_ops = {0};
+	struct vmap_ctx_t *vmap_ctx = NULL;
+
+	/* should not be an already instantiated vmap context. */
+	if (WARN_ON(!drv_ctx || !vmap_h || *vmap_h))
+		return -EINVAL;
+
+	vmap_ctx = kzalloc(sizeof(*vmap_ctx), GFP_KERNEL);
+	if (WARN_ON(!vmap_ctx))
+		return -ENOMEM;
+
+	vmap_ctx->host1x_pdev = drv_ctx->drv_param.host1x_pdev;
+	vmap_ctx->comm_channel_h = drv_ctx->comm_channel_h;
+	vmap_ctx->pci_client_h = drv_ctx->pci_client_h;
+	idr_init(&vmap_ctx->mem_idr);
+	idr_init(&vmap_ctx->sync_idr);
+	idr_init(&vmap_ctx->import_idr);
+	mutex_init(&vmap_ctx->mem_idr_lock);
+	mutex_init(&vmap_ctx->sync_idr_lock);
+	mutex_init(&vmap_ctx->import_idr_lock);
+
+	vmap_ctx->dummy_pdev = platform_device_alloc(drv_ctx->drv_name, -1);
+	if (!vmap_ctx->dummy_pdev) {
+		ret = -ENOMEM;
+		pr_err("Failed to allocate dummy platform device\n");
+		goto err;
+	}
+	ret = platform_device_add(vmap_ctx->dummy_pdev);
+	if (ret) {
+		platform_device_put(vmap_ctx->dummy_pdev);
+		pr_err("Failed to add the dummy platform device\n");
+		goto err;
+	}
+	ret = dma_set_mask(&vmap_ctx->dummy_pdev->dev, DMA_BIT_MASK(39));
+	if (ret) {
+		platform_device_del(vmap_ctx->dummy_pdev);
+		platform_device_put(vmap_ctx->dummy_pdev);
+		pr_err("Failed to set mask for dummy platform device\n");
+		goto err;
+	}
+	vmap_ctx->dummy_pdev_init = true;
+
+	/* comm-channel for registering and unregistering import objects.*/
+	cb_ops.callback = vmap_importobj_register;
+	cb_ops.ctx = (void *)vmap_ctx;
+	ret = comm_channel_register_msg_cb(vmap_ctx->comm_channel_h,
+					   COMM_MSG_TYPE_REGISTER, &cb_ops);
+	if (ret) {
+		pr_err("Failed to add callback with for Register msg\n");
+		goto err;
+	}
+
+	cb_ops.callback = vmap_importobj_unregister;
+	cb_ops.ctx = (void *)vmap_ctx;
+	ret = comm_channel_register_msg_cb(vmap_ctx->comm_channel_h,
+					   COMM_MSG_TYPE_UNREGISTER, &cb_ops);
+	if (ret) {
+		pr_err("Failed to add callback with for Unregister msg\n");
+		goto err;
+	}
+
+	*vmap_h = vmap_ctx;
+	return ret;
+err:
+	vmap_deinit((void **)&vmap_ctx);
+	return ret;
+}
+
+/* Exit point only. */
+static int
+memobj_release(s32 obj_id, void *ptr, void *data)
+{
+	struct memobj_map_ref *map = (struct memobj_map_ref *)(ptr);
+	struct vmap_ctx_t *vmap_ctx = (struct vmap_ctx_t *)(data);
+
+	/* release irrespective of reference counts. */
+	if (map) {
+		memobj_unpin(vmap_ctx, &map->pin);
+		kfree(map);
+	}
+
+	return 0;
+}
+
+/* Exit point only. */
+static int
+syncobj_release(s32 obj_id, void *ptr, void *data)
+{
+	struct syncobj_map_ref *map = (struct syncobj_map_ref *)(ptr);
+	struct vmap_ctx_t *vmap_ctx = (struct vmap_ctx_t *)(data);
+
+	/* release irrespective of reference counts. */
+	if (map) {
+		syncobj_unpin(vmap_ctx, &map->pin);
+		kfree(map);
+	}
+
+	return 0;
+}
+
+/* Exit point only. */
+static int
+importobj_release(s32 obj_id, void *ptr, void *data)
+{
+	struct importobj_map_ref *map = (struct importobj_map_ref *)(ptr);
+	struct vmap_ctx_t *vmap_ctx = (struct vmap_ctx_t *)(data);
+	struct comm_msg msg = {0};
+
+	if (map) {
+		msg.type = COMM_MSG_TYPE_UNREGISTER;
+		msg.u.unreg.export_desc = map->reg.export_desc;
+		msg.u.unreg.iova = map->reg.attrib.iova;
+		msg.u.unreg.size = map->reg.attrib.size;
+		msg.u.unreg.offsetof = map->reg.attrib.offsetof;
+		comm_channel_msg_send(vmap_ctx->comm_channel_h, &msg);
+
+		kfree(map);
+	}
+
+	return 0;
+}
+
+/* Exit point for nvscic2c-pcie vmap sub-module/abstraction. */
+void
+vmap_deinit(void **vmap_h)
+{
+	struct vmap_ctx_t *vmap_ctx = (struct vmap_ctx_t *)(*vmap_h);
+
+	if (!vmap_ctx)
+		return;
+
+	comm_channel_unregister_msg_cb(vmap_ctx->comm_channel_h,
+				       COMM_MSG_TYPE_REGISTER);
+	comm_channel_unregister_msg_cb(vmap_ctx->comm_channel_h,
+				       COMM_MSG_TYPE_UNREGISTER);
+	/*
+	 * free all the allocations still idr allocated IDR.
+	 *
+	 * ideally, this should not be the case, however in scenario:
+	 * application went away and the remote missed to free the imported
+	 * target handle, then during module unload (PCIe link shall be down)
+	 * we shall free all the pinned + yet to be unpinned handles.
+	 */
+	mutex_lock(&vmap_ctx->mem_idr_lock);
+	idr_for_each(&vmap_ctx->mem_idr, memobj_release, vmap_ctx);
+	idr_destroy(&vmap_ctx->mem_idr);
+	mutex_unlock(&vmap_ctx->mem_idr_lock);
+
+	mutex_lock(&vmap_ctx->sync_idr_lock);
+	idr_for_each(&vmap_ctx->sync_idr, syncobj_release, vmap_ctx);
+	idr_destroy(&vmap_ctx->sync_idr);
+	mutex_unlock(&vmap_ctx->sync_idr_lock);
+
+	mutex_lock(&vmap_ctx->import_idr_lock);
+	idr_for_each(&vmap_ctx->import_idr, importobj_release, vmap_ctx);
+	idr_destroy(&vmap_ctx->import_idr);
+	mutex_unlock(&vmap_ctx->import_idr_lock);
+
+	if (vmap_ctx->dummy_pdev_init) {
+		platform_device_unregister(vmap_ctx->dummy_pdev);
+		vmap_ctx->dummy_pdev_init = false;
+	}
+
+	kfree(vmap_ctx);
+	*vmap_h = NULL;
+}
diff --git a/drivers/misc/nvscic2c-pcie/vmap.h b/drivers/misc/nvscic2c-pcie/vmap.h
new file mode 100644
index 00000000..5800abff
--- /dev/null
+++ b/drivers/misc/nvscic2c-pcie/vmap.h
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __VMAP_H__
+#define __VMAP_H__
+
+#include "common.h"
+
+/*
+ * Virtual mapping abstraction offers pinning functionalities for the very
+ * the specific use-case(s) to achieve NvStreams streaming over NvSciC2cPcie
+ * on Tegra PCIe EP or Tegra PCIe RP. It's scope is specific to/for and limited
+ * to NvSciC2cPcie.
+ *
+ * Virtual mapping interfaces primarily offer mapping(also referred as pinning
+ * in tegra context) and unmapping(also referred as unpinning in tegra context)
+ * dma_buf backed NvRmMemHandles or Syncpoint Shim backed NvRmHost1xSyncpoints.
+ * In this abstraction, Memory/Mem objects/objs are NvRmMemHandles(or it's FD),
+ * and Sync objects/objs are NvRmHost1xSyncpointHandles (or it's FD).
+ *
+ * This SW shall run either on Tegra PCIe RP or Tegra PCIe EP.
+ *
+ * ASSUMPTION: Once pages are mapped/pinned, on tegra they shall not swap out.
+ *
+ * On tegra, Mem objects are the NvRmMemHandles with struct dma_buf backing.
+ * On tegra, Sync objects are the NvRmHost1xSyncpointHandles with Syncpoint
+ * shim (aperture) backing.
+ *
+ * Each object is considered one of the following:
+ *
+ * 1. Local - The object visibility is limited to local SoC on which it is
+ *    allocated. Also sometimes referred as Unexported. These objects are mapped
+ *    to PCIe device address space and it's whereabouts are NOT shared with
+ *    remote SoC.
+ * 2. Export/Exported - The object visibility is across PCIe to remote SoC
+ *    and subsequently remote SoC can initiate writes to it. For tegra,
+ *    exported objects are never read over PCIe. These objects are mapped to
+ *    PCIe device address space and it's whereabouts shall be shared with remote
+ *    SoC.
+ * 3. Import/Imported - This is a virtual object which points to the
+ *    corresponding object exported by remote SoC. Being virtual, it shall be
+ *    similar for both Mem and Sync objects - therefore Imported object is
+ *    just an import/imported object and NOT imported Mem/Sync obj.
+ *
+ * Protection/Permission flags:
+ * a. Local Mem objects map to PCIe device with READ access.
+ * b. Export Mem objects map to PCIe device with WRITE access (We export for
+ *    remote to write to it via CPU or PCIe eDMA(zero-copy).
+ * c. Local Sync objects are not mapped to PCIe device as NvSciC2cPcie KMD
+ *    shall signal(write) them via NvHost1x interface.
+ * d. Export Sync objects map to PCIe device with WRITE access (We export for
+ *    remote to signal(write) to it via CPU or PCIe eDMA).
+ *
+ * Mapping:
+ * Tegra PCIe EP exposes three BAR memory windows towards PCIe RP. Of these,
+ * Only one (1) (BAR0) is available for NvSciC2cPcie access. Therefore, all
+ * Export objects must be mapped to a PCIe address which this PCIe EP BAR0
+ * translation is programmed with. With the overall PCIe address space being
+ * much bigger than the PCIe BAR0 space, there is an explicit need to stitch
+ * all Exported objects to a single region. This requires Export objects be
+ * mapped using iommu apis to achieve BAR stitching and this mapping is
+ * referred as client managed with NvSciC2cPcie managing the iova region.
+ *
+ * Tegra PCIe EP has limited set of translation registers for it's CPU to raise
+ * PCIe transactions towards a PCIe RP. Therefore, when Sync objects are
+ * exported from PCIe RP towards PCIe EP to CPU signal them, they must be mapped
+ * to a single iova region which PCIe EP has setup for it's translaction
+ * registers. This is strictly not applicable for Exported Mem objects as they
+ * are written by eDMA always by the importing SoC. However, to keep parity and
+ * symmetry in the behavior of Exported Mem objects from Tegra PCIe EE->PCIe RP,
+ * Exported Mem objects from Tegra PCIe RP->Tegra PCIe EP shall also be mapped
+ * to a client managed iova region.
+ *
+ * For Local Mem objects which are accessed by local SoC PCIe eDMA, they
+ * can be mapped to any pcie address outside the reserved iova region by
+ * NvSciC2cPcie for exports. This doesn't require any iova management by
+ * client and is prefectly fine to use pci device (smmu) managed iova.
+ * This mapping is referred as device(dev) managed mapping. Only on Tegra PCIe
+ * RP, Exported Mem objects can be mapped using dev managed iova as Tegra PCIe
+ * EP shall write them using eDMA, but as stated before, to keep parity and
+ * symmetry in the behavior of Exported Mem objects from Tegra PCIe EE->PCIe RP,
+ * Exported Mem objects from Tegra PCIe RP->Tegra PCIe EP shall also be mapped
+ * to a client managed iova region.
+ *
+ * All Sync objects (local or export) are mapped to NvSciC2cPcie for signalling
+ * (Write access), based on QNX security policy, there shall be only one
+ * signaler allowed and, therefore, Sync objects are pinned/mapped just once.
+ * Export Mem objects are mapped to NvSciC2cPcie for remote SoC to produce data
+ * (Write access), also in the lack of N producer -> 1 consumer user-case,
+ * remote Mem objects are pinned/mapped just once. However, Local Mem objects
+ * which have Read access, can be mapped/pinned again. Essentially, all objects
+ * requiring Write access by NvSciC2cPcie (PCIe device) are pinned/mapped once.
+ *
+ * Summary:
+ *  i.   Types:
+ *        a. Local Mem objects.
+ *        b. Export Mem objects.
+ *        c. Local Sync objects.
+ *        d. Export Sync objects.
+ *        e. Import objects.
+ *  ii.  Mapping:
+ *        a. Local Mem objects - dev managed (READ only).
+ *        b. Export Mem objects - client managed (WRITE only).
+ *            On Tegra PCIe EP:- compulsarily client managed.
+ *            On Tegra PCIe RP:- Can be either dev managed or client managed.
+ *            Choose client manged to be in symmetry with Tegra PCIe EP.
+ *        c. Local Sync objects - Not mapped but pinned (tracked).
+ *        d. Export Sync objects - client managed (WRITE ONLY).
+ *            On Tegra PCIe EP:- compulsarily client managed.
+ *            On Tegra PCIe RP:- Can be either dev managed (if Tegra PCIe RP use
+ *            only eDMA for signaling, but we have use-cases for CPU signaling
+ *            also from Tegra PCIe RP. Therefore choose client managed as it can
+ *            satisfy CPU and eDMA signaling needs from remote Tegra PCIe EP.
+ *        e. Import Objects - virtual objects pointing to exported objects by
+ *            remote. Every exported object from a SoC must have a corresponding
+ *            import object on remote SoC. Not mapped but pinned(tracked).
+ */
+
+/* forward declaration. */
+struct driver_ctx_t;
+
+/* Object type that shall be virtually mapped for PCIe access. */
+enum vmap_obj_type {
+	/* NvRmMemHandle (struct dma_buf *), aka, memobj.*/
+	VMAP_OBJ_TYPE_MEM = STREAM_OBJ_TYPE_MEM,
+
+	/* NvRmHost1xSyncpointHandle ( syncpt id), aka, memobj.*/
+	VMAP_OBJ_TYPE_SYNC = STREAM_OBJ_TYPE_SYNC,
+
+	/*(virtual) objects imported from remote SoC.*/
+	VMAP_OBJ_TYPE_IMPORT,
+};
+
+/*
+ * Permissions for pin/mapping Buff/Sync objs to PCIe device
+ *
+ * at the moment, WRITE for all EXPORT*
+ * at the moment, READ for all LOCAL*
+ */
+enum vmap_obj_prot {
+	/* read-only access by PCIe device. */
+	VMAP_OBJ_PROT_READ = 1,
+
+	/* write only access to PCIe device. */
+	VMAP_OBJ_PROT_WRITE = 2,
+
+	/*
+	 * no use-case known today.
+	 * VMAP_OBJ_PERM_READWRITE = 4,
+	 */
+};
+
+/* Which IOVA to use for mapping Mem/Sync objs.*/
+enum vmap_mngd {
+	/*
+	 * Stitching of all exported objects is done by reserving an IOVA region
+	 * and mapping Mem and Sync objs to it. The reserved IOVA region is
+	 * managed by client/user (NvSciC2cPcie) and use iommu apis to map Mem
+	 * or Sync objects to the specific IOVA.
+	 */
+	VMAP_MNGD_CLIENT = 0,
+
+	/*
+	 * The IOVA is managed by pci dev, therefore dev managed. Used only for
+	 * Mem objects (Local and possible for exported too).
+	 */
+	VMAP_MNGD_DEV,
+};
+
+/* Returned object attributes after mapping.*/
+struct vmap_obj_attributes {
+	enum vmap_obj_type type;
+	s32 id;
+	u64 iova;
+	size_t size;
+	size_t offsetof;
+	/* only for local sync obj.*/
+	u32 syncpt_id;
+};
+
+/* Parameters to map Mem object.*/
+struct vmap_memobj_map_params {
+	s32 fd;
+	/* To allow mapping Export Mem objects as dev-managed - Tegra PCIe RP.*/
+	enum vmap_mngd mngd;
+	/* local/source mem as read-only, remote/export as write-only.*/
+	enum vmap_obj_prot prot;
+};
+
+/* Parameters to map Sync object.*/
+struct vmap_syncobj_map_params {
+	s32 fd;
+	u32 id;
+	/* client mngd only.*/
+	enum vmap_mngd mngd;
+	/* write-only.*/
+	enum vmap_obj_prot prot;
+	/* local sync objs will not be pinned to pcie address space.*/
+	bool pin_reqd;
+};
+
+/* Parameters to map Import object.*/
+struct vmap_importobj_map_params {
+	u64 export_desc;
+};
+
+struct vmap_obj_map_params {
+	enum vmap_obj_type type;
+	union vmap_params {
+		struct vmap_memobj_map_params memobj;
+		struct vmap_syncobj_map_params syncobj;
+		struct vmap_importobj_map_params importobj;
+	} u;
+};
+
+/* Parameters to register Import object, as received from remote NvSciC2cPcie.*/
+struct vmap_importobj_reg_params {
+	u64 export_desc;
+	u64 iova;
+	size_t size;
+	size_t offsetof;
+};
+
+/* Entry point for the virtual mapping sub-module/abstraction. */
+int
+vmap_init(struct driver_ctx_t *drv_ctx, void **vmap_h);
+
+/* Exit point for nvscic2c-pcie vmap sub-module/abstraction. */
+void
+vmap_deinit(void **vmap_h);
+
+/* Map objects to pcie device.*/
+int
+vmap_obj_map(void *vmap_h, struct vmap_obj_map_params *params,
+	     struct vmap_obj_attributes *attrib);
+
+/* Unmap objects from pcie device.*/
+int
+vmap_obj_unmap(void *vmap_h, enum vmap_obj_type type, s32 obj_id);
+
+/* Increment reference count for objects. */
+int
+vmap_obj_getref(void *vmap_h, enum vmap_obj_type type, s32 obj_id);
+
+/* Decrement reference count for objects. */
+int
+vmap_obj_putref(void *vmap_h, enum vmap_obj_type type, s32 obj_id);
+#endif // __VMAP_H__
diff --git a/include/uapi/misc/nvscic2c-pcie-ioctl.h b/include/uapi/misc/nvscic2c-pcie-ioctl.h
new file mode 100644
index 00000000..adb81d1a
--- /dev/null
+++ b/include/uapi/misc/nvscic2c-pcie-ioctl.h
@@ -0,0 +1,269 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __UAPI_NVSCIC2C_PCIE_IOCTL_H__
+#define __UAPI_NVSCIC2C_PCIE_IOCTL_H__
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#if !defined(__KERNEL__)
+#define __user
+#endif
+
+#define MAX_NAME_SZ		(32)
+
+/* Link status between the two peers - encapsulates PCIe link also.*/
+enum nvscic2c_pcie_link {
+	NVSCIC2C_PCIE_LINK_DOWN = 0,
+	NVSCIC2C_PCIE_LINK_UP,
+};
+
+/**
+ * stream extensions - object type.
+ */
+enum nvscic2c_pcie_obj_type {
+	NVSCIC2C_PCIE_OBJ_TYPE_INVALID = 0,
+
+	/* local NvRmMemHandle(tegra) /NvRmHandle(x86) obj. */
+	NVSCIC2C_PCIE_OBJ_TYPE_SOURCE_MEM,
+
+	/* Exported NvRmMemHandle(tegra) /NvRmHandle(x86) obj. */
+	NVSCIC2C_PCIE_OBJ_TYPE_TARGET_MEM,
+
+	/* local NvRmHost1xSyncpoint(tegra) /GPU Semaphore(x86) obj. */
+	NVSCIC2C_PCIE_OBJ_TYPE_LOCAL_SYNC,
+
+	/* Exported NvRmHost1xSyncpoint(tegra) /GPU Semaphore(x86) obj. */
+	NVSCIC2C_PCIE_OBJ_TYPE_REMOTE_SYNC,
+
+	/* (virtual) objects imported from remote SoC. */
+	NVSCIC2C_PCIE_OBJ_TYPE_IMPORT,
+
+	NVSCIC2C_PCIE_OBJ_TYPE_MAXIMUM,
+};
+
+/**
+ * PCIe aperture and PCIe shared memory
+ * are divided in different C2C endpoints.
+ * Data structure represents endpoint's
+ * physical address and size.
+ */
+struct nvscic2c_pcie_endpoint_mem_info {
+	/* would be one of the enum nvscic2c_mem_type.*/
+	__u32 offset;
+
+	/* size of this memory type device would like user-space to map.*/
+	__u32 size;
+};
+
+/**
+ * NvSciIpc endpoint information relayed to UMD. This information
+ * is per endpoint which shall allow UMD to mmap the endpoint's
+ * send, recv and pcie link area in user-space.
+ */
+struct nvscic2c_pcie_endpoint_info {
+	__u32 nframes;
+	__u32 frame_size;
+	struct nvscic2c_pcie_endpoint_mem_info peer;
+	struct nvscic2c_pcie_endpoint_mem_info self;
+	struct nvscic2c_pcie_endpoint_mem_info link;
+};
+
+/**
+ * stream extensions - Pin/Map.
+ */
+struct nvscic2c_pcie_map_in_arg {
+	/*
+	 * Mem obj - NvRmMemHandle FD. Sync obj - NvRmHost1xSyncpointHandle FD.
+	 */
+	__s32 fd;
+	/*
+	 * Mem obj - padding. Sync obj - Syncpoint Id.
+	 */
+	__u32 id;
+};
+
+struct nvscic2c_pcie_map_out_arg {
+	__s32 handle;
+	__u32 pad;
+};
+
+struct nvscic2c_pcie_map_obj_args {
+	__s32 obj_type;
+	__u32 pad;
+	struct nvscic2c_pcie_map_in_arg in;
+	struct nvscic2c_pcie_map_out_arg out;
+};
+
+/**
+ * stream extensions - Export.
+ */
+struct nvscic2c_pcie_export_in_arg {
+	__s32 handle;
+	__u32 pad;
+};
+
+struct nvscic2c_pcie_export_out_arg {
+	__u64 desc;
+};
+
+struct nvscic2c_pcie_export_obj_args {
+	__s32 obj_type;
+	__u32 pad;
+	struct nvscic2c_pcie_export_in_arg in;
+	struct nvscic2c_pcie_export_out_arg out;
+};
+
+/**
+ * stream extensions - Import.
+ */
+struct nvscic2c_pcie_import_in_arg {
+	__u64 desc;
+};
+
+struct nvscic2c_pcie_import_out_arg {
+	__s32 handle;
+	__u32 pad;
+};
+
+struct nvscic2c_pcie_import_obj_args {
+	__s32 obj_type;
+	__u32 pad;
+	struct nvscic2c_pcie_import_in_arg in;
+	struct nvscic2c_pcie_import_out_arg out;
+};
+
+/**
+ * stream extensions - Free Pinned Or Imported objects.
+ */
+struct nvscic2c_pcie_free_obj_args {
+	__s32 obj_type;
+	__s32 handle;
+};
+
+/**
+ * stream extensions - one transfer/copy unit.
+ */
+struct nvscic2c_pcie_flush_range {
+	__s32 src_handle;
+	__s32 dst_handle;
+	__u64 offset;
+	__u64 size;
+};
+
+/*
+ * @local_post_fences: user memory atleast of size:
+ *  num_local_post_fences * sizeof(__s32) - local sync handles
+ *
+ * @remote_post_fences: user memory atleast of size:
+ *  num_remote_post_fences * sizeof(__s32) - import sync handles
+ *
+ * @copy_requests: user memory atleast of size:
+ *  num_flush_ranges * sizeof(struct nvscic2c_pcie_flush_range)
+ */
+struct nvscic2c_pcie_submit_copy_args {
+	__u64 num_local_post_fences;
+	__u64 local_post_fences;
+	__u64 num_remote_post_fences;
+	__u64 remote_post_fences;
+	__u64 num_flush_ranges;
+	__u64 flush_ranges;
+	__u64 remote_post_fence_values;
+};
+
+/**
+ * stream extensions - Pass upper limit for the total possible outstanding
+ * submit copy requests.
+ * @max_copy_requests: Maximum outstanding @nvscic2c_pcie_submit_copy_args.
+ * @max_flush_ranges: Maximum @nvscic2c_pcie_flush_range possible for each
+ *  of the @max_copy_requests (@nvscic2c_pcie_submit_copy_args)
+ * @max_post_fences: Maximum post-fences possible for each of the
+ *  @max_copy_requests (@nvscic2c_pcie_submit_copy_args)
+ */
+struct nvscic2c_pcie_max_copy_args {
+	__u64 max_copy_requests;
+	__u64 max_flush_ranges;
+	__u64 max_post_fences;
+};
+
+/* Only to facilitate calculation of maximum size of ioctl arguments.*/
+union nvscic2c_pcie_ioctl_arg_max_size {
+	struct nvscic2c_pcie_max_copy_args mc;
+	struct nvscic2c_pcie_submit_copy_args cr;
+	struct nvscic2c_pcie_free_obj_args fo;
+	struct nvscic2c_pcie_import_obj_args io;
+	struct nvscic2c_pcie_export_obj_args eo;
+	struct nvscic2c_pcie_map_obj_args mp;
+	struct nvscic2c_pcie_endpoint_info ep;
+};
+
+/* IOCTL magic number - seen available in ioctl-number.txt*/
+#define NVSCIC2C_PCIE_IOCTL_MAGIC    0xC2
+
+#define NVSCIC2C_PCIE_IOCTL_GET_INFO \
+	_IOWR(NVSCIC2C_PCIE_IOCTL_MAGIC, 1,\
+	      struct nvscic2c_pcie_endpoint_info)
+
+/**
+ * notify remote
+ */
+#define NVSCIC2C_PCIE_IOCTL_NOTIFY_REMOTE \
+	_IO(NVSCIC2C_PCIE_IOCTL_MAGIC, 2)
+
+/**
+ * Pin/Map Mem or Sync objects.
+ */
+#define NVSCIC2C_PCIE_IOCTL_MAP \
+	_IOWR(NVSCIC2C_PCIE_IOCTL_MAGIC, 3,\
+	      struct nvscic2c_pcie_map_obj_args)
+
+/**
+ * Get Export descriptor for Target/Remote Mem/Sync objects.
+ */
+#define NVSCIC2C_PCIE_IOCTL_GET_AUTH_TOKEN \
+	_IOWR(NVSCIC2C_PCIE_IOCTL_MAGIC, 4,\
+	      struct nvscic2c_pcie_export_obj_args)
+
+/**
+ * Get Handle from the imported export descriptor.
+ */
+#define NVSCIC2C_PCIE_IOCTL_GET_HANDLE \
+	_IOWR(NVSCIC2C_PCIE_IOCTL_MAGIC, 5,\
+	      struct nvscic2c_pcie_import_obj_args)
+
+/**
+ * Free the Mapped/Pinned Source, Target or Imported Mem or Sync object handle.
+ */
+#define NVSCIC2C_PCIE_IOCTL_FREE \
+	_IOW(NVSCIC2C_PCIE_IOCTL_MAGIC, 6,\
+	      struct nvscic2c_pcie_free_obj_args)
+
+/**
+ * Submit a Copy request for transfer.
+ */
+#define NVSCIC2C_PCIE_IOCTL_SUBMIT_COPY_REQUEST \
+	_IOW(NVSCIC2C_PCIE_IOCTL_MAGIC, 7,\
+	      struct nvscic2c_pcie_submit_copy_args)
+
+/**
+ * Set the maximum possible outstanding copy requests that can be submitted.
+ */
+#define NVSCIC2C_PCIE_IOCTL_MAX_COPY_REQUESTS \
+	_IOW(NVSCIC2C_PCIE_IOCTL_MAGIC, 8,\
+	      struct nvscic2c_pcie_max_copy_args)
+
+#define NVSCIC2C_PCIE_IOCTL_NUMBER_MAX 8
+
+#endif /*__UAPI_NVSCIC2C_PCIE_IOCTL_H__*/