// SPDX-License-Identifier: GPL-2.0-only // Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. #define pr_fmt(fmt) "nvscic2c-pcie: comm-channel: " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include "comm-channel.h" #include "common.h" #include "module.h" #include "pci-client.h" #define CACHE_ALIGN (64) /* Fifo size */ /* * This is wrong, but to have private communication channel functional at the * earliest, we allocate large set of frames assuming all the available * endpoints can share all possible export descriptors without having to block * and wait for channel to become writeable. * * Despite this huge fifo size, if msg cannot be send, it either means remote * is busy processing them quite slow (unlikely) or ill. In such a case, we * shall return -EAGAIN for application to retry and application can bail-out * after few retries. */ #define COMM_CHANNEL_NFRAMES (1024) #define COMM_CHANNEL_FRAME_SZ (64) /* fifo header.*/ struct header { u32 wr_count; u32 rd_count; u8 pad[CACHE_ALIGN - sizeof(u32) - sizeof(u32)]; } __packed; /* kthread. */ struct task_t { struct task_struct *task; wait_queue_head_t waitq; struct completion shutdown_compl; bool shutdown; bool created; }; /* Notification handling. */ struct syncpt_t { u32 id; u32 threshold; struct host1x_syncpt *sp; /* PCIe aperture for writes to peer syncpoint for same comm-channel. */ struct pci_aper_t peer_mem; /* syncpoint physical address for stitching to PCIe BAR backing.*/ size_t size; phys_addr_t phy_addr; /* iova mapping of client choice.*/ void *iova_block_h; u64 iova; bool mapped_iova; /* Lock to protect fences between callback and deinit. */ struct mutex lock; /* Fence to specific Threshold. */ struct dma_fence *fence; struct dma_fence_cb fence_cb; /* Work to notify and allocate new fence. */ struct work_struct work; void (*notifier)(void *data); void *notifier_data; bool fence_release; }; struct fifo_t { /* slot/frames for the comm-channel.*/ u32 nframes; u32 frame_sz; /* fifo operations.*/ struct header *send_hdr; struct header *recv_hdr; struct header *local_hdr; u8 *send; u8 *recv; u32 wr_pos; u32 rd_pos; /* serialize send operations.*/ struct mutex send_lock; /* fifo physical pages and stitched to iova of client choice(recv).*/ struct cpu_buff_t self_mem; void *iova_block_h; u64 iova; bool mapped_iova; /* PCIe aperture for writes to peer comm fifo. */ struct pci_aper_t peer_mem; }; struct comm_channel_ctx_t { /* data. */ struct fifo_t fifo; /* Notification. */ struct syncpt_t syncpt; /* receive message task.*/ struct task_t r_task; atomic_t recv_count; /* Callbacks registered for recv messages. */ struct mutex cb_ops_lock; struct callback_ops cb_ops[COMM_MSG_TYPE_MAXIMUM]; /* pci client handle.*/ void *pci_client_h; /* nvscic2c-pcie DT node reference, used in getting syncpoint shim. */ struct device_node *of_node; struct platform_device *host1x_pdev; }; static inline bool can_send(struct fifo_t *fifo, int *ret) { bool send = false; u32 peer_toread = (fifo->local_hdr->wr_count - fifo->recv_hdr->rd_count); if (peer_toread < fifo->nframes) { /* space available - can send.*/ send = true; *ret = 0; } else if (peer_toread == fifo->nframes) { /* full. client can try again (at the moment.)*/ send = false; *ret = -EAGAIN; // -ENOMEM; } else if (peer_toread > fifo->nframes) { /* erroneous.*/ send = false; *ret = -EOVERFLOW; } return send; } static inline bool can_recv(struct fifo_t *fifo, int *ret) { bool recv = false; u32 toread = (fifo->recv_hdr->wr_count - fifo->local_hdr->rd_count); if (toread == 0) { /* no frame available to read.*/ recv = false; *ret = -ENODATA; } else if (toread <= fifo->nframes) { /* frames available - can read.*/ recv = true; *ret = 0; } else if (toread > fifo->nframes) { /* erroneous.*/ recv = false; *ret = -EOVERFLOW; } return recv; } static int send_msg(struct comm_channel_ctx_t *comm_ctx, struct comm_msg *msg) { int ret = 0; size_t size = 0; void *from = NULL; void __iomem *to = NULL; struct fifo_t *fifo = NULL; struct syncpt_t *syncpt = NULL; enum peer_cpu_t peer_cpu = NVCPU_ORIN; fifo = &comm_ctx->fifo; syncpt = &comm_ctx->syncpt; peer_cpu = pci_client_get_peer_cpu(comm_ctx->pci_client_h); mutex_lock(&fifo->send_lock); /* if no space available, at the moment, client can try again. */ if (!can_send(fifo, &ret)) { mutex_unlock(&fifo->send_lock); return ret; } to = (void __iomem *)(fifo->send + (fifo->wr_pos * fifo->frame_sz)); from = (void *)(msg); size = sizeof(*msg); memcpy_toio(to, from, size); fifo->local_hdr->wr_count++; writel(fifo->local_hdr->wr_count, (void __iomem *)(&fifo->send_hdr->wr_count)); if (peer_cpu == NVCPU_X86_64) { /* comm-channel irq verctor always take from index 0 */ ret = pci_client_raise_irq(comm_ctx->pci_client_h, PCI_EPC_IRQ_MSI, 0); } else { /* notify peer for each write.*/ writel(0x1, syncpt->peer_mem.pva); } fifo->wr_pos = fifo->wr_pos + 1; if (fifo->wr_pos >= fifo->nframes) fifo->wr_pos = 0; mutex_unlock(&fifo->send_lock); return ret; } int comm_channel_ctrl_msg_send(void *comm_channel_h, struct comm_msg *msg) { struct comm_channel_ctx_t *comm_ctx = (struct comm_channel_ctx_t *)comm_channel_h; if (WARN_ON(!comm_ctx || !msg)) return -EINVAL; if (WARN_ON(msg->type != COMM_MSG_TYPE_BOOTSTRAP && msg->type != COMM_MSG_TYPE_EDMA_RX_DESC_IOVA_RETURN && msg->type != COMM_MSG_TYPE_SHUTDOWN && msg->type != COMM_MSG_TYPE_LINK)) return -EINVAL; /* * this is a special ctrl message where the sender: @DRV_MODE_EPC * or @DRV_MODE_EPF shares control information with peer. We do not * check for nvscic2c-pcie link status as these messages can flow * even when nvscic2c-pcie link status is down but possibly with * PCIE physical link between PCIe RP SoC and PCIe EP SoC alive. */ return send_msg(comm_ctx, msg); } int comm_channel_msg_send(void *comm_channel_h, struct comm_msg *msg) { enum nvscic2c_pcie_link link = NVSCIC2C_PCIE_LINK_DOWN; struct comm_channel_ctx_t *comm_ctx = (struct comm_channel_ctx_t *)comm_channel_h; if (WARN_ON(!comm_ctx || !msg)) return -EINVAL; if (WARN_ON(msg->type != COMM_MSG_TYPE_REGISTER && msg->type != COMM_MSG_TYPE_UNREGISTER)) return -EINVAL; link = pci_client_query_link_status(comm_ctx->pci_client_h); if (link != NVSCIC2C_PCIE_LINK_UP) return -ENOLINK; return send_msg(comm_ctx, msg); } static int recv_taskfn(void *arg) { int ret = 0; struct comm_channel_ctx_t *comm_ctx = NULL; struct comm_msg *msg = NULL; struct task_t *task = NULL; struct fifo_t *fifo = NULL; struct callback_ops *cb_ops = NULL; comm_ctx = (struct comm_channel_ctx_t *)(arg); task = &comm_ctx->r_task; fifo = &comm_ctx->fifo; while (!task->shutdown) { /* wait for notification from peer or shutdown. */ wait_event_interruptible(task->waitq, (atomic_read(&comm_ctx->recv_count) || task->shutdown)); /* task is exiting.*/ if (task->shutdown) continue; /* read all on single notify.*/ atomic_dec(&comm_ctx->recv_count); while (can_recv(fifo, &ret)) { msg = (struct comm_msg *) (fifo->recv + (fifo->rd_pos * fifo->frame_sz)); if (msg->type > COMM_MSG_TYPE_INVALID && msg->type < COMM_MSG_TYPE_MAXIMUM) { mutex_lock(&comm_ctx->cb_ops_lock); cb_ops = &comm_ctx->cb_ops[msg->type]; if (cb_ops->callback) cb_ops->callback ((void *)msg, cb_ops->ctx); mutex_unlock(&comm_ctx->cb_ops_lock); } fifo->local_hdr->rd_count++; writel(fifo->local_hdr->rd_count, (void __iomem *)(&fifo->send_hdr->rd_count)); /* do not noifty peer for space availability. */ fifo->rd_pos = fifo->rd_pos + 1; if (fifo->rd_pos >= fifo->nframes) fifo->rd_pos = 0; } /* if nothing (left) to read, go back waiting. */ continue; } /* we do not use kthread_stop(), but wait on this.*/ complete(&task->shutdown_compl); return 0; } /* Shall be invoked on expiry of syncpoint shim fence/trigger from remote. */ static void syncpt_callback(void *data) { struct comm_channel_ctx_t *comm_ctx = NULL; if (WARN_ON(!data)) return; comm_ctx = (struct comm_channel_ctx_t *)(data); /* kick r_task for processing this notification.*/ atomic_inc(&comm_ctx->recv_count); wake_up_interruptible_all(&comm_ctx->r_task.waitq); } static void host1x_cb_func(struct dma_fence *f, struct dma_fence_cb *cb) { struct syncpt_t *syncpt = container_of(cb, struct syncpt_t, fence_cb); schedule_work(&syncpt->work); } static int allocate_fence(struct syncpt_t *syncpt) { int ret = 0; struct dma_fence *fence = NULL; fence = host1x_fence_create(syncpt->sp, ++syncpt->threshold, false); if (IS_ERR_OR_NULL(fence)) { ret = PTR_ERR(fence); pr_err("host1x_fence_create failed with: %d\n", ret); return ret; } mutex_lock(&syncpt->lock); ret = dma_fence_add_callback(fence, &syncpt->fence_cb, host1x_cb_func); if (ret != 0) { /* If already expired. */ if (ret == -ENOENT) { ret = 0; schedule_work(&syncpt->work); } goto put_fence; } syncpt->fence = fence; mutex_unlock(&syncpt->lock); return ret; put_fence: dma_fence_put(fence); mutex_unlock(&syncpt->lock); return ret; } static void fence_do_work(struct work_struct *work) { int ret = 0; struct syncpt_t *syncpt = container_of(work, struct syncpt_t, work); if (syncpt->notifier) syncpt->notifier(syncpt->notifier_data); mutex_lock(&syncpt->lock); /* If deinit triggered, no need to proceed. */ if (syncpt->fence_release) return; if (syncpt->fence) { dma_fence_put(syncpt->fence); syncpt->fence = NULL; } mutex_unlock(&syncpt->lock); ret = allocate_fence(syncpt); if (ret != 0) { mutex_unlock(&syncpt->lock); pr_err("allocate_fence failed with: %d\n", ret); return; } } static int start_msg_handling(struct comm_channel_ctx_t *comm_ctx) { int ret = 0; struct task_t *r_task = &comm_ctx->r_task; struct syncpt_t *syncpt = &comm_ctx->syncpt; /* start the recv msg processing task.*/ init_waitqueue_head(&r_task->waitq); init_completion(&r_task->shutdown_compl); r_task->shutdown = false; r_task->task = kthread_run(recv_taskfn, comm_ctx, "comm-channel-recv-task"); if (IS_ERR_OR_NULL(r_task->task)) { pr_err("Failed to create comm channel recv task\n"); return PTR_ERR(r_task->task); } r_task->created = true; /* enable syncpt notifications handling. */ mutex_init(&syncpt->lock); syncpt->threshold = host1x_syncpt_read(syncpt->sp); syncpt->notifier = syncpt_callback; syncpt->notifier_data = comm_ctx; INIT_WORK(&syncpt->work, fence_do_work); syncpt->fence_release = false; ret = allocate_fence(syncpt); if (ret != 0) { pr_err("allocate_fence failed with: %d\n", ret); return ret; } return ret; } static int stop_msg_handling(struct comm_channel_ctx_t *comm_ctx) { int ret = 0; struct task_t *r_task = NULL; struct syncpt_t *syncpt = NULL; if (!comm_ctx) return ret; syncpt = &comm_ctx->syncpt; r_task = &comm_ctx->r_task; if (r_task->created) { /* Remove dma fence callback. */ mutex_lock(&syncpt->lock); syncpt->fence_release = true; if (syncpt->fence) { ret = dma_fence_remove_callback(syncpt->fence, &syncpt->fence_cb); if (ret) { /* * If dma_fence_remove_callback() returns true * means callback is removed successfully. * Cancel the fence to drop the refcount. */ host1x_fence_cancel(syncpt->fence); } dma_fence_put(syncpt->fence); syncpt->fence = NULL; } mutex_unlock(&syncpt->lock); cancel_work_sync(&syncpt->work); mutex_destroy(&syncpt->lock); /* * initiate stop. * we do not use kthread_stop(), but wait on this. */ r_task->shutdown = true; wake_up_interruptible(&r_task->waitq); ret = wait_for_completion_interruptible(&r_task->shutdown_compl); if (ret) pr_err("Failed to wait for completion\n"); r_task->created = false; } return ret; } static void free_syncpoint(struct comm_channel_ctx_t *comm_ctx) { struct syncpt_t *syncpt = NULL; if (!comm_ctx) return; syncpt = &comm_ctx->syncpt; if (syncpt->peer_mem.pva) { iounmap(syncpt->peer_mem.pva); syncpt->peer_mem.pva = NULL; } if (syncpt->mapped_iova) { pci_client_unmap_addr(comm_ctx->pci_client_h, syncpt->iova, syncpt->size); syncpt->mapped_iova = false; } if (syncpt->iova_block_h) { pci_client_free_iova(comm_ctx->pci_client_h, &syncpt->iova_block_h); syncpt->iova_block_h = NULL; } if (syncpt->sp) { host1x_syncpt_put(syncpt->sp); syncpt->sp = NULL; } } static int allocate_syncpoint(struct comm_channel_ctx_t *comm_ctx) { int ret = 0; int prot = 0; struct host1x *host1x = NULL; struct syncpt_t *syncpt = NULL; size_t offsetof = 0x0; syncpt = &comm_ctx->syncpt; host1x = platform_get_drvdata(comm_ctx->host1x_pdev); if (!host1x) { pr_err("Host1x handle is null."); return -EINVAL; } syncpt->sp = host1x_syncpt_alloc(host1x, HOST1X_SYNCPT_CLIENT_MANAGED, "nvscic2c-pcie-comm-ch"); if (IS_ERR_OR_NULL(syncpt->sp)) { ret = -ENOMEM; pr_err("Failed to reserve comm notify syncpt\n"); goto err; } syncpt->id = host1x_syncpt_id(syncpt->sp); /* physical address of syncpoint shim. */ syncpt->phy_addr = get_syncpt_shim_offset(syncpt->id); syncpt->size = SP_MAP_SIZE; /* reserve iova with the iova manager.*/ ret = pci_client_alloc_iova(comm_ctx->pci_client_h, syncpt->size, &syncpt->iova, &offsetof, &syncpt->iova_block_h); if (ret) { pr_err("Err reserving comm syncpt iova region of size: 0x%lx\n", syncpt->size); goto err; } /* map the pages to the reserved iova. */ prot = (IOMMU_CACHE | IOMMU_READ | IOMMU_WRITE); ret = pci_client_map_addr(comm_ctx->pci_client_h, syncpt->iova, syncpt->phy_addr, syncpt->size, prot); if (ret) { pr_err("Err mapping comm SP physical addr to reserved iova\n"); goto err; } syncpt->mapped_iova = true; pr_debug("mapped phy:0x%pa[p]+0x%lx to iova:0x%llx\n", &syncpt->phy_addr, syncpt->size, syncpt->iova); /* * get peer's aperture offset. Map tx (pcie aper for notif tx.) * for peer's access of comm-syncpt, it is assumed offsets are * same on both SoC. */ syncpt->peer_mem.size = syncpt->size; ret = pci_client_get_peer_aper(comm_ctx->pci_client_h, offsetof, syncpt->peer_mem.size, &syncpt->peer_mem.aper); if (ret) { pr_err("Failed to get comm peer's syncpt aperture\n"); goto err; } syncpt->peer_mem.pva = ioremap(syncpt->peer_mem.aper, syncpt->peer_mem.size); if (!syncpt->peer_mem.pva) { ret = -ENOMEM; pr_err("Failed to ioremap comm peer's syncpt pcie aperture\n"); goto err; } return ret; err: free_syncpoint(comm_ctx); return ret; } static void free_fifo_memory(struct comm_channel_ctx_t *comm_ctx) { struct fifo_t *fifo = NULL; if (!comm_ctx) return; fifo = &comm_ctx->fifo; if (fifo->local_hdr) { kfree((void *)fifo->local_hdr); fifo->local_hdr = NULL; } if (fifo->peer_mem.pva) { iounmap(fifo->peer_mem.pva); fifo->peer_mem.pva = NULL; } if (fifo->mapped_iova) { pci_client_unmap_addr(comm_ctx->pci_client_h, fifo->iova, fifo->self_mem.size); fifo->mapped_iova = false; } if (fifo->iova_block_h) { pci_client_free_iova(comm_ctx->pci_client_h, &fifo->iova_block_h); fifo->iova_block_h = NULL; } if (fifo->self_mem.pva) { free_pages_exact(fifo->self_mem.pva, fifo->self_mem.size); fifo->self_mem.pva = NULL; } mutex_destroy(&fifo->send_lock); } static int allocate_fifo_memory(struct comm_channel_ctx_t *comm_ctx) { int ret = 0; int prot = 0; size_t offsetof = 0x0; struct fifo_t *fifo = &comm_ctx->fifo; mutex_init(&fifo->send_lock); /* memory size includes frames and header.*/ fifo->nframes = COMM_CHANNEL_NFRAMES; fifo->frame_sz = COMM_CHANNEL_FRAME_SZ; fifo->self_mem.size = (fifo->nframes * fifo->frame_sz); fifo->self_mem.size += sizeof(struct header); fifo->self_mem.size = ALIGN(fifo->self_mem.size, PAGE_SIZE); fifo->self_mem.pva = alloc_pages_exact(fifo->self_mem.size, (GFP_KERNEL | __GFP_ZERO)); if (!fifo->self_mem.pva) { pr_err("Error allocating fifo contiguous pages: (%lu)\n", (fifo->self_mem.size >> PAGE_SHIFT)); return -ENOMEM; } fifo->self_mem.phys_addr = page_to_phys(virt_to_page(fifo->self_mem.pva)); /* reserve iova for stitching comm channel memory for peer access.*/ ret = pci_client_alloc_iova(comm_ctx->pci_client_h, fifo->self_mem.size, &fifo->iova, &offsetof, &fifo->iova_block_h); if (ret) { pr_err("Failed reserving fifo iova region of size: 0x%lx\n", fifo->self_mem.size); goto err; } /* map the pages to the reserved iova.*/ prot = (IOMMU_CACHE | IOMMU_READ | IOMMU_WRITE); ret = pci_client_map_addr(comm_ctx->pci_client_h, fifo->iova, fifo->self_mem.phys_addr, fifo->self_mem.size, prot); if (ret) { pr_err("Failed to map comm fifo pages to reserved iova\n"); goto err; } fifo->mapped_iova = true; pr_debug("comm fifo mapped page:0x%pa[p]+0x%lx to iova:0x%llx\n", &fifo->self_mem.phys_addr, fifo->self_mem.size, fifo->iova); /* * for peer's access of comm-fifo, it is assumed offsets are * same on both SoC. */ fifo->peer_mem.size = fifo->self_mem.size; ret = pci_client_get_peer_aper(comm_ctx->pci_client_h, offsetof, fifo->peer_mem.size, &fifo->peer_mem.aper); if (ret) { pr_err("Failed to get comm peer's fifo aperture\n"); goto err; } fifo->peer_mem.pva = ioremap(fifo->peer_mem.aper, fifo->peer_mem.size); if (!fifo->peer_mem.pva) { ret = -ENOMEM; pr_err("Failed to ioremap peer's comm fifo aperture\n"); goto err; } /* allocate local header.*/ fifo->local_hdr = kzalloc(sizeof(*fifo->local_hdr), GFP_KERNEL); if (WARN_ON(!fifo->local_hdr)) { ret = -ENOMEM; goto err; } fifo->recv_hdr = (struct header *)(fifo->self_mem.pva); fifo->send_hdr = (__force struct header *)(fifo->peer_mem.pva); fifo->recv = ((u8 *)fifo->recv_hdr + sizeof(struct header)); fifo->send = ((u8 *)fifo->send_hdr + sizeof(struct header)); return ret; err: free_fifo_memory(comm_ctx); return ret; } int comm_channel_init(struct driver_ctx_t *drv_ctx, void **comm_channel_h) { int ret = 0; struct comm_channel_ctx_t *comm_ctx = NULL; if (WARN_ON(sizeof(struct comm_msg) > COMM_CHANNEL_FRAME_SZ)) return -EINVAL; /* should not be an already instantiated. */ if (WARN_ON(!drv_ctx || !comm_channel_h || *comm_channel_h)) return -EINVAL; /* start by allocating the comm ctx.*/ comm_ctx = kzalloc(sizeof(*comm_ctx), GFP_KERNEL); if (WARN_ON(!comm_ctx)) return -ENOMEM; mutex_init(&comm_ctx->cb_ops_lock); atomic_set(&comm_ctx->recv_count, 0); comm_ctx->pci_client_h = drv_ctx->pci_client_h; comm_ctx->of_node = drv_ctx->drv_param.of_node; comm_ctx->host1x_pdev = drv_ctx->drv_param.host1x_pdev; /* * allocate fifo area, make it visible to peer. Assume same aperture * for peer access too. */ ret = allocate_fifo_memory(comm_ctx); if (ret) goto err; /* * allocate notification for comm-channel, Assume same aperture * for peer access too. */ ret = allocate_syncpoint(comm_ctx); if (ret) goto err; /* we can now wait for notifications/messages to be received.*/ ret = start_msg_handling(comm_ctx); if (ret) goto err; *comm_channel_h = comm_ctx; return ret; err: comm_channel_deinit((void **)&comm_ctx); return ret; } void comm_channel_deinit(void **comm_channel_h) { struct comm_channel_ctx_t *comm_ctx = (struct comm_channel_ctx_t *)(*comm_channel_h); if (!comm_ctx) return; stop_msg_handling(comm_ctx); free_syncpoint(comm_ctx); free_fifo_memory(comm_ctx); mutex_destroy(&comm_ctx->cb_ops_lock); kfree(comm_ctx); *comm_channel_h = NULL; } int comm_channel_register_msg_cb(void *comm_channel_h, enum comm_msg_type type, struct callback_ops *ops) { int ret = 0; struct callback_ops *cb_ops = NULL; struct comm_channel_ctx_t *comm_ctx = (struct comm_channel_ctx_t *)comm_channel_h; if (WARN_ON(!comm_ctx || !ops || !ops->callback)) return -EINVAL; if (WARN_ON(type <= COMM_MSG_TYPE_INVALID || type >= COMM_MSG_TYPE_MAXIMUM)) return -EINVAL; mutex_lock(&comm_ctx->cb_ops_lock); cb_ops = &comm_ctx->cb_ops[type]; if (cb_ops->callback) { pr_err("Callback for msg type: (%u) is already taken\n", type); ret = -EBUSY; } else { cb_ops->callback = ops->callback; cb_ops->ctx = ops->ctx; } mutex_unlock(&comm_ctx->cb_ops_lock); return ret; } int comm_channel_unregister_msg_cb(void *comm_channel_h, enum comm_msg_type type) { int ret = 0; struct callback_ops *cb_ops = NULL; struct comm_channel_ctx_t *comm_ctx = (struct comm_channel_ctx_t *)comm_channel_h; if (WARN_ON(!comm_ctx)) return -EINVAL; if (WARN_ON(type <= COMM_MSG_TYPE_INVALID || type >= COMM_MSG_TYPE_MAXIMUM)) return -EINVAL; mutex_lock(&comm_ctx->cb_ops_lock); cb_ops = &comm_ctx->cb_ops[type]; cb_ops->callback = NULL; cb_ops->ctx = NULL; mutex_unlock(&comm_ctx->cb_ops_lock); return ret; }