Files
linux-nv-oot/drivers/misc/nvscic2c-pcie/stream-extensions.c
Jian-Min Liu 719d3fa024 nvidia-oot: use TEGRA_SYSTEM_TYPE_ACK as ack macro
Use config TEGRA_SYSTEM_TYPE_ACK to control kernel builds only
base on the system type which also aliged to Makefile use.

Bug 4223187

Change-Id: I276208d180d1b3459eccb80d8fdb818f4507d3df
Signed-off-by: Jian-Min Liu <jianminl@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/2966713
Reviewed-by: Laxman Dewangan <ldewangan@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2023-08-24 23:32:04 -07:00

1509 lines
40 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#define pr_fmt(fmt) "nvscic2c-pcie: stream-ext: " fmt
#include <linux/anon_inodes.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/fdtable.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/host1x-next.h>
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/tegra-pcie-edma.h>
#include <linux/version.h>
#include <uapi/misc/nvscic2c-pcie-ioctl.h>
#include "comm-channel.h"
#include "common.h"
#include "descriptor.h"
#include "module.h"
#include "pci-client.h"
#include "stream-extensions.h"
#include "vmap.h"
#if defined(CONFIG_TEGRA_SYSTEM_TYPE_ACK)
MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver);
#endif
/* forward declaration.*/
struct stream_ext_ctx_t;
struct stream_ext_obj;
/* limits as set for copy requests.*/
struct copy_req_limits {
u64 max_copy_requests;
u64 max_flush_ranges;
u64 max_post_fences;
};
/*
* Copied submit-copy args from use-space. These are then parsed and validated.
* This copy is required as args have pointer to user-space area which be copied
* into kernel-space before using it. On subsequent copy, basic checks are done
* and shall be used to create a copy request payload for eDMA.
*/
struct copy_req_params {
u64 num_local_post_fences;
s32 *local_post_fences;
u64 num_remote_post_fences;
s32 *remote_post_fences;
u64 num_flush_ranges;
u64 *remote_post_fence_values;
struct nvscic2c_pcie_flush_range *flush_ranges;
};
/* one copy request.*/
struct copy_request {
/* book-keeping for copy completion.*/
struct list_head node;
/*
* back-reference to stream_ext_context, used in eDMA callback.
* to add this copy_request back in free_list for reuse. Also,
* the host1x_pdev in ctx is used via this ctx in the callback.
*/
struct stream_ext_ctx_t *ctx;
/*
* actual number of handles per the submit-copy request.
* Shall include ((2 * num_flush_range) + num_local_post_fences
* + num_remote_post_fences).
* used for refcounting: out of order free and copy.
*/
u64 num_handles;
/*
* space for num_handles considering worst-case allocation:
* ((2 * max_flush_ranges) + (max_post_fences)).
*/
struct stream_ext_obj **handles;
/*
* actual number of edma-desc per the submit-copy request.
* Shall include (num_flush_range).
*/
u64 num_edma_desc;
/*
* space for num_edma_desc considering worst-case allocation:
* (max_flush_ranges).
*/
struct tegra_pcie_edma_desc *edma_desc;
/*
* actual number of local_post-fences per the submit-copy request.
* Shall include (num_local_post_fences).
*/
u64 num_local_post_fences;
u64 num_remote_post_fences;
u64 num_remote_buf_objs;
/*
* space for num_local_post_fences considering worst-case allocation:
* max_post_fences, assuming submit-copy could have all post-fences for
* local signalling.
*/
struct stream_ext_obj **local_post_fences;
/*
* space for num_remote_post_fences considering worst-case allocation:
* max_post_fences, assuming submit-copy could have all post-fences for
* remote signalling.
*/
struct stream_ext_obj **remote_post_fences;
/*
* space for num_remote_buf_objs considering worst-case allocation:
* max_flush_ranges, assuming submit-copy could have all flush ranges for
* transfers.
*/
struct stream_ext_obj **remote_buf_objs;
/* X86 uses semaphores for fences and it needs to be written with NvSciStream
* provided value
*/
u64 *remote_post_fence_values;
enum peer_cpu_t peer_cpu;
};
struct stream_ext_obj {
/* book-keeping for cleanup.*/
struct list_head node;
s32 handle;
/* back-reference to vmap handle, required during free/unmap.*/
void *vmap_h;
/* for correctness check. */
enum nvscic2c_pcie_obj_type type;
u32 soc_id;
u32 cntrlr_id;
u32 ep_id;
/* for ordering out of order copy and free ops.*/
bool marked_for_del;
struct kref refcount;
/* virtual mapping information.*/
struct vmap_obj_attributes vmap;
/*
* ImportObj only.
* Add offsetof from peer window to local aper base for access
* by local eDMA or CPU(mmap) towards peer obj.
* - For PCIe RP.
* Add offsetof from peer window to local aper base for access by
* CPU(mmap) towards peer obj, eDMA will use the iova direactly.
* - For PCIe EP.
*/
u32 import_type;
phys_addr_t aper;
/* Mapping for ImportObj for CPU Read/Write.*/
void __iomem *import_obj_map;
};
/* stream extensions context per NvSciC2cPcie endpoint.*/
struct stream_ext_ctx_t {
/*
* mode - EPC(on PCIe RP) or EPF(on PCIe EP).
* Destination address of eDMA descriptor is different for these
* two modes.
*/
enum drv_mode_t drv_mode;
u32 ep_id;
char ep_name[NAME_MAX];
struct node_info_t local_node;
struct node_info_t peer_node;
/* for local post fence increment ops.*/
struct platform_device *host1x_pdev;
struct host1x *host1x;
/* vmap abstraction.*/
void *vmap_h;
/* tegra-pcie-edma cookie.*/
void *edma_h;
/* comm-channel abstraction. */
void *comm_channel_h;
/* pci client abstraction. */
void *pci_client_h;
/* max copy request limits as set by user.*/
struct copy_req_limits cr_limits;
/* Intermediate validated and copied user-args for submit-copy ioctl.*/
struct copy_req_params cr_params;
/* Async copy: book-keeping copy-requests: free and in-progress.*/
struct list_head free_list;
/* guard free_list.*/
struct mutex free_lock;
atomic_t transfer_count;
wait_queue_head_t transfer_waitq;
/* allocated stream obj list for book-keeping.*/
struct list_head obj_list;
};
static int
cache_copy_request_handles(struct copy_req_params *params,
struct copy_request *cr);
static int
release_copy_request_handles(struct copy_request *cr);
static void
signal_local_post_fences(struct copy_request *cr);
static void
signal_remote_post_fences(struct copy_request *cr);
static int
prepare_edma_desc(enum drv_mode_t drv_mode, struct copy_req_params *params,
struct tegra_pcie_edma_desc *desc, u64 *num_desc);
static edma_xfer_status_t
schedule_edma_xfer(void *edma_h, void *priv, u64 num_desc,
struct tegra_pcie_edma_desc *desc);
static void
callback_edma_xfer(void *priv, edma_xfer_status_t status,
struct tegra_pcie_edma_desc *desc);
static int
validate_handle(struct stream_ext_ctx_t *ctx, s32 handle,
enum nvscic2c_pcie_obj_type type);
static int
allocate_handle(struct stream_ext_ctx_t *ctx,
enum nvscic2c_pcie_obj_type type,
void *ioctl_args);
static int
copy_args_from_user(struct stream_ext_ctx_t *ctx,
struct nvscic2c_pcie_submit_copy_args *args,
struct copy_req_params *params);
static int
allocate_copy_request(struct stream_ext_ctx_t *ctx,
struct copy_request **copy_request);
static void
free_copy_request(struct copy_request **copy_request);
static int
allocate_copy_req_params(struct stream_ext_ctx_t *ctx,
struct copy_req_params *params);
static void
free_copy_req_params(struct copy_req_params *params);
static int
validate_copy_req_params(struct stream_ext_ctx_t *ctx,
struct copy_req_params *params);
static int
fops_mmap(struct file *filep, struct vm_area_struct *vma)
{
int ret = 0;
u64 memaddr = 0x0;
u64 memsize = 0x0;
struct stream_ext_obj *stream_obj = NULL;
if (WARN_ON(!filep))
return -EFAULT;
if (WARN_ON(!filep->private_data))
return -EFAULT;
if (WARN_ON(!(vma)))
return -EFAULT;
/* read access of import sync object would mean poll over pcie.*/
if (WARN_ON(vma->vm_flags & VM_READ))
return -EINVAL;
stream_obj = (struct stream_ext_obj *)(filep->private_data);
if (WARN_ON(stream_obj->type != NVSCIC2C_PCIE_OBJ_TYPE_IMPORT))
return -EOPNOTSUPP;
if (WARN_ON(stream_obj->import_type != STREAM_OBJ_TYPE_SYNC))
return -EOPNOTSUPP;
if (WARN_ON(stream_obj->marked_for_del))
return -EINVAL;
memsize = stream_obj->vmap.size;
memaddr = stream_obj->aper;
vma->vm_pgoff = 0;
#if defined(CONFIG_TEGRA_SYSTEM_TYPE_ACK) && (LINUX_VERSION_CODE >= KERNEL_VERSION(6, 1, 0))
vm_flags_set(vma, VM_DONTCOPY);
#else
vma->vm_flags |= (VM_DONTCOPY);
#endif
vma->vm_page_prot = pgprot_device(vma->vm_page_prot);
ret = remap_pfn_range(vma, vma->vm_start, PFN_DOWN(memaddr),
memsize, vma->vm_page_prot);
if (ret)
pr_err("mmap() failed for Imported sync object\n");
return ret;
}
static void
streamobj_free(struct kref *kref)
{
struct stream_ext_obj *stream_obj = NULL;
if (!kref)
return;
stream_obj = container_of(kref, struct stream_ext_obj, refcount);
if (stream_obj) {
if (stream_obj->import_obj_map)
iounmap(stream_obj->import_obj_map);
vmap_obj_unmap(stream_obj->vmap_h, stream_obj->vmap.type,
stream_obj->vmap.id);
kfree(stream_obj);
}
}
static int
fops_release(struct inode *inode, struct file *filep)
{
struct stream_ext_obj *stream_obj =
(struct stream_ext_obj *)(filep->private_data);
if (!stream_obj)
return 0;
/*
* actual free happens when the refcount reaches zero. This is done to
* accommodate: out of order free while copy isin progress use-case.
*/
list_del(&stream_obj->node);
stream_obj->marked_for_del = true;
kref_put(&stream_obj->refcount, streamobj_free);
filep->private_data = NULL;
return 0;
}
/* for all stream objs - Local, remote + Mem, Sync, Import*/
static const struct file_operations fops_default = {
.owner = THIS_MODULE,
.release = fops_release,
.mmap = fops_mmap,
};
/* implement NVSCIC2C_PCIE_IOCTL_FREE ioctl call. */
static int
ioctl_free_obj(struct stream_ext_ctx_t *ctx,
struct nvscic2c_pcie_free_obj_args *args)
{
int ret = 0;
struct file *filep = NULL;
struct stream_ext_obj *stream_obj = NULL;
/* validate the input handle for correctness.*/
ret = validate_handle(ctx, args->handle, args->obj_type);
if (ret)
return ret;
filep = fget(args->handle);
stream_obj = filep->private_data;
filep->private_data = NULL;
fput(filep);
if (stream_obj) {
list_del(&stream_obj->node);
stream_obj->marked_for_del = true;
kref_put(&stream_obj->refcount, streamobj_free);
}
/* this shall close the handle: resulting in fops_release().*/
close_fd(args->handle);
return 0;
}
/* implement NVSCIC2C_PCIE_IOCTL_GET_AUTH_TOKEN call. */
static int
ioctl_export_obj(struct stream_ext_ctx_t *ctx,
struct nvscic2c_pcie_export_obj_args *args)
{
int ret = 0;
u64 exp_desc = 0;
struct comm_msg msg = {0};
struct file *filep = NULL;
struct stream_ext_obj *stream_obj = NULL;
struct node_info_t *peer = &ctx->peer_node;
enum vmap_obj_type export_type = STREAM_OBJ_TYPE_MEM;
/* validate the input handle for correctness.*/
ret = validate_handle(ctx, args->in.handle, args->obj_type);
if (ret)
return ret;
/* only target/remote can be exported.*/
if (args->obj_type == NVSCIC2C_PCIE_OBJ_TYPE_TARGET_MEM)
export_type = STREAM_OBJ_TYPE_MEM;
else if (args->obj_type == NVSCIC2C_PCIE_OBJ_TYPE_REMOTE_SYNC)
export_type = STREAM_OBJ_TYPE_SYNC;
else
return -EINVAL;
filep = fget(args->in.handle);
if (!filep) {
pr_err("filep is NULL\n");
return -EINVAL;
}
stream_obj = filep->private_data;
/*
* take a reference to the virtual mapping. The reference shall be
* released by peer when the peer unregisters it's corresponding
* imported obj. This happens via comm-channel.
*
* reference count of stream_obj is not taken, valid scenario to
* free the exported obj from this SoC but the virtual mapping
* to continue exist and is released when peer SoC releases it's
* corresponding import stream obj.
*/
ret = vmap_obj_getref(stream_obj->vmap_h, stream_obj->vmap.type,
stream_obj->vmap.id);
if (ret) {
pr_err("(%s): Failed ref counting an object\n", ctx->ep_name);
fput(filep);
return ret;
}
/* generate export desc.*/
peer = &ctx->peer_node;
exp_desc = gen_desc(peer->board_id, peer->soc_id, peer->cntrlr_id,
ctx->ep_id, export_type, stream_obj->vmap.id);
/*share it with peer for peer for corresponding import.*/
pr_debug("Exporting descriptor = (%llu)\n", exp_desc);
msg.type = COMM_MSG_TYPE_REGISTER;
msg.u.reg.export_desc = exp_desc;
msg.u.reg.iova = stream_obj->vmap.iova;
msg.u.reg.size = stream_obj->vmap.size;
msg.u.reg.offsetof = stream_obj->vmap.offsetof;
ret = comm_channel_msg_send(ctx->comm_channel_h, &msg);
if (ret)
vmap_obj_putref(stream_obj->vmap_h, stream_obj->vmap.type,
stream_obj->vmap.id);
else
args->out.desc = exp_desc;
fput(filep);
return ret;
}
/* implement NVSCIC2C_PCIE_IOCTL_GET_HANDLE call. */
static int
ioctl_import_obj(struct stream_ext_ctx_t *ctx,
struct nvscic2c_pcie_import_obj_args *args)
{
int ret = 0;
s32 handle = -1;
struct file *filep = NULL;
struct stream_ext_obj *stream_obj = NULL;
struct node_info_t *local = &ctx->local_node;
enum peer_cpu_t peer_cpu;
if (args->obj_type != NVSCIC2C_PCIE_OBJ_TYPE_IMPORT)
return -EINVAL;
/* validate the incoming descriptor.*/
ret = validate_desc(args->in.desc, local->board_id, local->soc_id,
local->cntrlr_id, ctx->ep_id);
if (ret) {
pr_err("(%s): Invalid descriptor: (%llu) received\n",
ctx->ep_name, args->in.desc);
return ret;
}
/*
* Import the desc :- create virt. mapping, bind it to a stream_obj
* and create a UMD handle for this stream_obj.
*/
handle = allocate_handle(ctx, args->obj_type, (void *)args);
if (handle < 0)
return handle;
pr_debug("Imported descriptor = (%llu)\n", args->in.desc);
filep = fget(handle);
if (!filep)
return -ENOMEM;
stream_obj = filep->private_data;
stream_obj->import_type = get_handle_type_from_desc(args->in.desc);
ret = pci_client_get_peer_aper(ctx->pci_client_h, stream_obj->vmap.offsetof,
stream_obj->vmap.size, &stream_obj->aper);
if (ret) {
pr_err("(%s): PCI Client Get Peer Aper Failed\n", ctx->ep_name);
fput(filep);
return ret;
}
peer_cpu = pci_client_get_peer_cpu(ctx->pci_client_h);
if (peer_cpu == NVCPU_X86_64) {
stream_obj->import_obj_map = ioremap(stream_obj->aper,
PAGE_SIZE);
} else {
if (stream_obj->import_type == STREAM_OBJ_TYPE_SYNC) {
stream_obj->import_obj_map = ioremap(stream_obj->aper,
PAGE_SIZE);
if (WARN_ON(!stream_obj->import_obj_map)) {
fput(filep);
return -ENOMEM;
}
}
}
fput(filep);
args->out.handle = handle;
return ret;
}
/* implement NVSCIC2C_PCIE_IOCTL_MAP ioctl call. */
static int
ioctl_map_obj(struct stream_ext_ctx_t *ctx,
struct nvscic2c_pcie_map_obj_args *args)
{
int ret = 0;
s32 handle = -1;
/*
* Create virt. mapping for the user primitive objs - Mem or Sync.
* Bind it to a stream_obj. Create a UMD handle for this stream_obj.
*/
handle = allocate_handle(ctx, args->obj_type, (void *)args);
if (handle < 0)
return handle;
args->out.handle = handle;
return ret;
}
/* implement NVSCIC2C_PCIE_IOCTL_SUBMIT_COPY_REQUEST ioctl call. */
static int
ioctl_submit_copy_request(struct stream_ext_ctx_t *ctx,
struct nvscic2c_pcie_submit_copy_args *args)
{
int ret = 0;
struct copy_request *cr = NULL;
edma_xfer_status_t edma_status = EDMA_XFER_FAIL_INVAL_INPUTS;
enum nvscic2c_pcie_link link = NVSCIC2C_PCIE_LINK_DOWN;
link = pci_client_query_link_status(ctx->pci_client_h);
if (link != NVSCIC2C_PCIE_LINK_UP)
return -ENOLINK;
/* copy user-supplied submit-copy args.*/
ret = copy_args_from_user(ctx, args, &ctx->cr_params);
if (ret)
return ret;
/* validate the user-supplied handles in flush_range and post-fence.*/
ret = validate_copy_req_params(ctx, &ctx->cr_params);
if (ret)
return ret;
/* get one copy-request from the free list.*/
mutex_lock(&ctx->free_lock);
if (list_empty(&ctx->free_list)) {
/*
* user supplied more than mentioned in max_copy_requests OR
* eDMA async didn't invoke callback when eDMA was done.
*/
mutex_unlock(&ctx->free_lock);
return -EAGAIN;
}
cr = list_first_entry(&ctx->free_list, struct copy_request, node);
list_del(&cr->node);
mutex_unlock(&ctx->free_lock);
/*
* To support out-of-order free and copy-requets when eDMA is in async
* mode, cache all the handles from the copy-submit params and increment
* their reference count before eDMA ops. Post eDMA, decrement the
* reference, thereby, when during in-progress eDMA, free() is received
* for the same set of handles, the handles would be marked for deletion
* but doesn't actually get deleted.
*/
ret = cache_copy_request_handles(&ctx->cr_params, cr);
if (ret)
goto reclaim_cr;
cr->peer_cpu = pci_client_get_peer_cpu(ctx->pci_client_h);
/* generate eDMA descriptors from flush_ranges.*/
ret = prepare_edma_desc(ctx->drv_mode, &ctx->cr_params, cr->edma_desc,
&cr->num_edma_desc);
if (ret) {
release_copy_request_handles(cr);
goto reclaim_cr;
}
/* schedule asynchronous eDMA.*/
atomic_inc(&ctx->transfer_count);
edma_status = schedule_edma_xfer(ctx->edma_h, (void *)cr,
cr->num_edma_desc, cr->edma_desc);
if (edma_status != EDMA_XFER_SUCCESS) {
ret = -EIO;
atomic_dec(&ctx->transfer_count);
release_copy_request_handles(cr);
goto reclaim_cr;
}
return ret;
reclaim_cr:
mutex_lock(&ctx->free_lock);
list_add_tail(&cr->node, &ctx->free_list);
mutex_unlock(&ctx->free_lock);
return ret;
}
/* implement NVSCIC2C_PCIE_IOCTL_MAX_COPY_REQUESTS ioctl call. */
static int
ioctl_set_max_copy_requests(struct stream_ext_ctx_t *ctx,
struct nvscic2c_pcie_max_copy_args *args)
{
int ret = 0;
u32 i = 0;
struct copy_request *cr = NULL;
struct list_head *curr = NULL, *next = NULL;
if (WARN_ON(!args->max_copy_requests ||
!args->max_flush_ranges ||
!args->max_post_fences))
return -EINVAL;
/* limits already set.*/
if (WARN_ON(ctx->cr_limits.max_copy_requests ||
ctx->cr_limits.max_flush_ranges ||
ctx->cr_limits.max_post_fences))
return -EINVAL;
ctx->cr_limits.max_copy_requests = args->max_copy_requests;
ctx->cr_limits.max_flush_ranges = args->max_flush_ranges;
ctx->cr_limits.max_post_fences = args->max_post_fences;
/* allocate one submit-copy params.*/
ret = allocate_copy_req_params(ctx, &ctx->cr_params);
if (ret) {
pr_err("Failed to allocate submit-copy params\n");
goto clean_up;
}
/* allocate the maximum outstanding copy requests we can have.*/
for (i = 0; i < ctx->cr_limits.max_copy_requests; i++) {
cr = NULL;
ret = allocate_copy_request(ctx, &cr);
if (ret) {
pr_err("Failed to allocate copy request\n");
goto clean_up;
}
mutex_lock(&ctx->free_lock);
list_add(&cr->node, &ctx->free_list);
mutex_unlock(&ctx->free_lock);
}
return ret;
clean_up:
mutex_unlock(&ctx->free_lock);
list_for_each_safe(curr, next, &ctx->free_list) {
cr = list_entry(curr, struct copy_request, node);
list_del(curr);
free_copy_request(&cr);
}
mutex_unlock(&ctx->free_lock);
free_copy_req_params(&ctx->cr_params);
return ret;
}
int
stream_extension_ioctl(void *stream_ext_h, unsigned int cmd, void *args)
{
int ret = 0;
struct stream_ext_ctx_t *ctx = NULL;
if (WARN_ON(!stream_ext_h || !args))
return -EINVAL;
ctx = (struct stream_ext_ctx_t *)stream_ext_h;
switch (cmd) {
case NVSCIC2C_PCIE_IOCTL_MAP:
ret = ioctl_map_obj
((struct stream_ext_ctx_t *)ctx,
(struct nvscic2c_pcie_map_obj_args *)args);
break;
case NVSCIC2C_PCIE_IOCTL_GET_AUTH_TOKEN:
ret = ioctl_export_obj
((struct stream_ext_ctx_t *)ctx,
(struct nvscic2c_pcie_export_obj_args *)args);
break;
case NVSCIC2C_PCIE_IOCTL_GET_HANDLE:
ret = ioctl_import_obj
((struct stream_ext_ctx_t *)ctx,
(struct nvscic2c_pcie_import_obj_args *)args);
break;
case NVSCIC2C_PCIE_IOCTL_FREE:
ret = ioctl_free_obj
((struct stream_ext_ctx_t *)ctx,
(struct nvscic2c_pcie_free_obj_args *)args);
break;
case NVSCIC2C_PCIE_IOCTL_SUBMIT_COPY_REQUEST:
ret = ioctl_submit_copy_request
((struct stream_ext_ctx_t *)ctx,
(struct nvscic2c_pcie_submit_copy_args *)args);
break;
case NVSCIC2C_PCIE_IOCTL_MAX_COPY_REQUESTS:
ret = ioctl_set_max_copy_requests
((struct stream_ext_ctx_t *)ctx,
(struct nvscic2c_pcie_max_copy_args *)args);
break;
default:
pr_err("(%s): unrecognised nvscic2c-pcie ioclt cmd: 0x%x\n",
ctx->ep_name, cmd);
ret = -ENOTTY;
break;
}
return ret;
}
int
stream_extension_init(struct stream_ext_params *params, void **stream_ext_h)
{
int ret = 0;
struct stream_ext_ctx_t *ctx = NULL;
if (WARN_ON(!params || !stream_ext_h || *stream_ext_h))
return -EINVAL;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (WARN_ON(!ctx))
return -ENOMEM;
ctx->drv_mode = params->drv_mode;
ctx->ep_id = params->ep_id;
ctx->host1x_pdev = params->host1x_pdev;
ctx->edma_h = params->edma_h;
ctx->vmap_h = params->vmap_h;
ctx->pci_client_h = params->pci_client_h;
ctx->comm_channel_h = params->comm_channel_h;
strscpy(ctx->ep_name, params->ep_name, NAME_MAX);
memcpy(&ctx->local_node, params->local_node, sizeof(ctx->local_node));
memcpy(&ctx->peer_node, params->peer_node, sizeof(ctx->peer_node));
ctx->host1x = platform_get_drvdata(ctx->host1x_pdev);
if (!ctx->host1x) {
ret = -EINVAL;
pr_err("Could not get host1x handle from host1x_pdev.");
goto err;
}
/* copy operations.*/
mutex_init(&ctx->free_lock);
INIT_LIST_HEAD(&ctx->free_list);
atomic_set(&ctx->transfer_count, 0);
init_waitqueue_head(&ctx->transfer_waitq);
/* bookkeeping of stream objs. */
INIT_LIST_HEAD(&ctx->obj_list);
*stream_ext_h = (void *)ctx;
return 0;
err:
kfree(ctx);
return ret;
}
#define MAX_TRANSFER_TIMEOUT_MSEC (5000)
void
stream_extension_deinit(void **stream_ext_h)
{
long ret = 0;
struct file *filep = NULL;
struct copy_request *cr = NULL;
struct stream_ext_obj *stream_obj = NULL;
struct list_head *curr = NULL, *next = NULL;
struct stream_ext_ctx_t *ctx = (struct stream_ext_ctx_t *)*stream_ext_h;
if (!ctx)
return;
/* wait for eDMA/copy(ies) to complete/abort. */
ret =
wait_event_timeout(ctx->transfer_waitq,
!(atomic_read(&ctx->transfer_count)),
msecs_to_jiffies(MAX_TRANSFER_TIMEOUT_MSEC));
if (ret <= 0)
pr_err("(%s): timed-out waiting for eDMA callbacks to return\n",
ctx->ep_name);
mutex_lock(&ctx->free_lock);
list_for_each_safe(curr, next, &ctx->free_list) {
cr = list_entry(curr, struct copy_request, node);
list_del(curr);
free_copy_request(&cr);
}
mutex_unlock(&ctx->free_lock);
free_copy_req_params(&ctx->cr_params);
mutex_destroy(&ctx->free_lock);
/*
* clean-up the non freed stream objs. Descriptor shall be freed when
* application exits.
*/
list_for_each_safe(curr, next, &ctx->obj_list) {
stream_obj = list_entry(curr, struct stream_ext_obj, node);
filep = fget(stream_obj->handle);
if (filep) {
filep->private_data = NULL;
fput(filep);
}
list_del(curr);
stream_obj->marked_for_del = true;
kref_put(&stream_obj->refcount, streamobj_free);
}
kfree(ctx);
*stream_ext_h = NULL;
}
static int
allocate_handle(struct stream_ext_ctx_t *ctx, enum nvscic2c_pcie_obj_type type,
void *ioctl_args)
{
int ret = 0;
s32 handle = -1;
struct stream_ext_obj *stream_obj = NULL;
struct vmap_obj_map_params vmap_params = {0};
struct vmap_obj_attributes vmap_attrib = {0};
/* one of the two below would apply.*/
struct nvscic2c_pcie_map_obj_args *map_args =
(struct nvscic2c_pcie_map_obj_args *)ioctl_args;
struct nvscic2c_pcie_import_obj_args *import_args =
(struct nvscic2c_pcie_import_obj_args *)ioctl_args;
/* create pcie virtual mapping of the obj.*/
switch (type) {
case NVSCIC2C_PCIE_OBJ_TYPE_SOURCE_MEM:
vmap_params.type = VMAP_OBJ_TYPE_MEM;
vmap_params.u.memobj.mngd = VMAP_MNGD_DEV;
vmap_params.u.memobj.prot = VMAP_OBJ_PROT_READ;
vmap_params.u.memobj.fd = map_args->in.fd;
break;
case NVSCIC2C_PCIE_OBJ_TYPE_TARGET_MEM:
vmap_params.type = VMAP_OBJ_TYPE_MEM;
vmap_params.u.memobj.mngd = VMAP_MNGD_CLIENT;
vmap_params.u.memobj.prot = VMAP_OBJ_PROT_WRITE;
vmap_params.u.memobj.fd = map_args->in.fd;
break;
case NVSCIC2C_PCIE_OBJ_TYPE_LOCAL_SYNC:
vmap_params.type = VMAP_OBJ_TYPE_SYNC;
vmap_params.u.syncobj.pin_reqd = false;
vmap_params.u.syncobj.fd = map_args->in.fd;
vmap_params.u.syncobj.id = map_args->in.id;
break;
case NVSCIC2C_PCIE_OBJ_TYPE_REMOTE_SYNC:
vmap_params.type = VMAP_OBJ_TYPE_SYNC;
vmap_params.u.syncobj.pin_reqd = true;
vmap_params.u.syncobj.mngd = VMAP_MNGD_CLIENT;
vmap_params.u.syncobj.prot = VMAP_OBJ_PROT_WRITE;
vmap_params.u.syncobj.fd = map_args->in.fd;
vmap_params.u.syncobj.id = map_args->in.id;
break;
case NVSCIC2C_PCIE_OBJ_TYPE_IMPORT:
vmap_params.type = VMAP_OBJ_TYPE_IMPORT;
vmap_params.u.importobj.export_desc = import_args->in.desc;
break;
default:
pr_err("Incorrect NVSCIC2C_IOCTL_MAP params\n");
return -EINVAL;
}
ret = vmap_obj_map(ctx->vmap_h, &vmap_params, &vmap_attrib);
if (ret) {
if (ret == -EAGAIN)
pr_info("Failed to map obj of type: (%d)\n", type);
else
pr_err("Failed to map obj of type: (%d)\n", type);
return ret;
}
/* bind the pcie virt. mapping to a streaming obj.*/
stream_obj = kzalloc(sizeof(*stream_obj), GFP_KERNEL);
if (WARN_ON(!stream_obj)) {
vmap_obj_unmap(ctx->vmap_h, vmap_attrib.type, vmap_attrib.id);
return -ENOMEM;
}
/*
* allocate a UMD handle for this streaming_obj.
* O_RDWR is required only for ImportedSyncObjs mmap() from user-space.
*/
handle = anon_inode_getfd("nvscic2c-pcie-stream-ext", &fops_default,
stream_obj, (O_RDWR | O_CLOEXEC));
if (handle < 0) {
pr_err("(%s): Failed to get stream obj handle\n", ctx->ep_name);
vmap_obj_unmap(ctx->vmap_h, vmap_attrib.type, vmap_attrib.id);
kfree(stream_obj);
return -EFAULT;
}
list_add_tail(&stream_obj->node, &ctx->obj_list);
stream_obj->handle = handle;
stream_obj->vmap_h = ctx->vmap_h;
stream_obj->type = type;
stream_obj->soc_id = ctx->local_node.soc_id;
stream_obj->cntrlr_id = ctx->local_node.cntrlr_id;
stream_obj->ep_id = ctx->ep_id;
memcpy(&stream_obj->vmap, &vmap_attrib, sizeof(vmap_attrib));
kref_init(&stream_obj->refcount);
return handle;
}
static edma_xfer_status_t
schedule_edma_xfer(void *edma_h, void *priv, u64 num_desc,
struct tegra_pcie_edma_desc *desc)
{
struct tegra_pcie_edma_xfer_info info = {0};
if (WARN_ON(!num_desc || !desc))
return -EINVAL;
info.type = EDMA_XFER_WRITE;
info.channel_num = 0; // no use-case to use all WR channels yet.
info.desc = desc;
info.nents = num_desc;
info.complete = callback_edma_xfer;
info.priv = priv;
return tegra_pcie_edma_submit_xfer(edma_h, &info);
}
/* Callback with each async eDMA submit xfer.*/
static void
callback_edma_xfer(void *priv, edma_xfer_status_t status,
struct tegra_pcie_edma_desc *desc)
{
struct copy_request *cr = (struct copy_request *)priv;
mutex_lock(&cr->ctx->free_lock);
/* increment post fences: local and remote.*/
if (status == EDMA_XFER_SUCCESS) {
signal_remote_post_fences(cr);
signal_local_post_fences(cr);
} else {
/* eDMA xfer failed, Update eDMA error and notify user. */
(void)pci_client_set_edma_error(cr->ctx->pci_client_h,
cr->ctx->ep_id,
NVSCIC2C_PCIE_EDMA_XFER_ERROR);
}
/* releases the references of the cubmit-copy handles.*/
release_copy_request_handles(cr);
/* reclaim the copy_request for reuse.*/
list_add_tail(&cr->node, &cr->ctx->free_list);
mutex_unlock(&cr->ctx->free_lock);
if (atomic_dec_and_test(&cr->ctx->transfer_count))
wake_up_all(&cr->ctx->transfer_waitq);
}
static int
prepare_edma_desc(enum drv_mode_t drv_mode, struct copy_req_params *params,
struct tegra_pcie_edma_desc *desc, u64 *num_desc)
{
u32 i = 0;
int ret = 0;
u32 iter = 0;
struct file *filep = NULL;
struct stream_ext_obj *stream_obj = NULL;
struct nvscic2c_pcie_flush_range *flush_range = NULL;
*num_desc = 0;
for (i = 0; i < params->num_flush_ranges; i++) {
flush_range = &params->flush_ranges[i];
filep = fget(flush_range->src_handle);
stream_obj = filep->private_data;
desc[iter].src = (stream_obj->vmap.iova + flush_range->offset);
fput(filep);
filep = fget(flush_range->dst_handle);
stream_obj = filep->private_data;
if (drv_mode == DRV_MODE_EPC)
desc[iter].dst = stream_obj->aper;
else
desc[iter].dst = stream_obj->vmap.iova;
desc[iter].dst += flush_range->offset;
fput(filep);
desc[iter].sz = flush_range->size;
iter++;
}
*num_desc += iter;
return ret;
}
/* this is post eDMA path, must be done with references still taken.*/
static void
signal_local_post_fences(struct copy_request *cr)
{
u32 i = 0;
struct host1x_syncpt *sp = NULL;
struct stream_ext_obj *stream_obj = NULL;
for (i = 0; i < cr->num_local_post_fences; i++) {
stream_obj = cr->local_post_fences[i];
/*
* Use noref API to fetch host1x_syncpt as ref is already taken
* while pinning and syncpoint won't be freed until we get
* EDMA callback for all submitted jobs.
*/
sp = host1x_syncpt_get_by_id_noref(cr->ctx->host1x,
stream_obj->vmap.syncpt_id);
if (sp)
(void)host1x_syncpt_incr(sp);
}
}
static void
signal_remote_post_fences(struct copy_request *cr)
{
u32 i = 0;
struct stream_ext_obj *stream_obj = NULL;
/* X86 remote end fences are signaled through CPU */
if (cr->peer_cpu == NVCPU_X86_64) {
/* Dummy read operation is done on the imported buffer object
* to ensure coherence of data on Vidmem of GA100 dGPU, which is
* connected as an EP to X86. This is needed as Ampere architecture
* doesn't support coherence of Write after Write operation and the
* dummy read of 4 bytes ensures the data is reconciled in vid-memory
* when the consumer waiting on a sysmem semaphore is unblocked.
*/
for (i = 0; i < cr->num_remote_buf_objs; i++) {
stream_obj = cr->remote_buf_objs[i];
(void)readl(stream_obj->import_obj_map);
}
for (i = 0; i < cr->num_remote_post_fences; i++) {
stream_obj = cr->remote_post_fences[i];
writeq(cr->remote_post_fence_values[i], stream_obj->import_obj_map);
}
} else {
for (i = 0; i < cr->num_remote_post_fences; i++) {
stream_obj = cr->remote_post_fences[i];
/*
* Issue dummy pcie read to ensure all data is visible
* to remote SoC before notification is delivered.
*/
(void)readl(stream_obj->import_obj_map);
writel(0x1, stream_obj->import_obj_map);
}
}
}
static int
release_copy_request_handles(struct copy_request *cr)
{
u32 i = 0;
struct stream_ext_obj *stream_obj = NULL;
for (i = 0; i < cr->num_handles; i++) {
stream_obj = cr->handles[i];
kref_put(&stream_obj->refcount, streamobj_free);
}
return 0;
}
static int
cache_copy_request_handles(struct copy_req_params *params,
struct copy_request *cr)
{
u32 i = 0;
s32 handle = -1;
struct file *filep = NULL;
struct stream_ext_obj *stream_obj = NULL;
cr->num_handles = 0;
cr->num_local_post_fences = 0;
cr->num_remote_post_fences = 0;
cr->num_remote_buf_objs = 0;
for (i = 0; i < params->num_local_post_fences; i++) {
handle = params->local_post_fences[i];
filep = fget(handle);
stream_obj = filep->private_data;
kref_get(&stream_obj->refcount);
cr->handles[cr->num_handles] = stream_obj;
cr->num_handles++;
/* collect all local post fences separately for nvhost incr.*/
cr->local_post_fences[cr->num_local_post_fences] = stream_obj;
cr->num_local_post_fences++;
fput(filep);
}
for (i = 0; i < params->num_remote_post_fences; i++) {
handle = params->remote_post_fences[i];
filep = fget(handle);
stream_obj = filep->private_data;
kref_get(&stream_obj->refcount);
cr->handles[cr->num_handles] = stream_obj;
cr->num_handles++;
cr->remote_post_fence_values[i] = params->remote_post_fence_values[i];
cr->remote_post_fences[cr->num_remote_post_fences] = stream_obj;
cr->num_remote_post_fences++;
fput(filep);
}
for (i = 0; i < params->num_flush_ranges; i++) {
handle = params->flush_ranges[i].src_handle;
filep = fget(handle);
stream_obj = filep->private_data;
kref_get(&stream_obj->refcount);
cr->handles[cr->num_handles] = stream_obj;
cr->num_handles++;
fput(filep);
handle = params->flush_ranges[i].dst_handle;
filep = fget(handle);
stream_obj = filep->private_data;
kref_get(&stream_obj->refcount);
cr->handles[cr->num_handles] = stream_obj;
cr->num_handles++;
cr->remote_buf_objs[cr->num_remote_buf_objs] = stream_obj;
cr->num_remote_buf_objs++;
fput(filep);
}
return 0;
}
static int
validate_handle(struct stream_ext_ctx_t *ctx, s32 handle,
enum nvscic2c_pcie_obj_type type)
{
int ret = -EINVAL;
struct stream_ext_obj *stream_obj = NULL;
struct file *filep = fget(handle);
if (!filep)
goto exit;
if (filep->f_op != &fops_default)
goto err;
stream_obj = filep->private_data;
if (!stream_obj)
goto err;
if (stream_obj->marked_for_del)
goto err;
if (stream_obj->soc_id != ctx->local_node.soc_id ||
stream_obj->cntrlr_id != ctx->local_node.cntrlr_id ||
stream_obj->ep_id != ctx->ep_id)
goto err;
if (stream_obj->type != type)
goto err;
/* okay.*/
ret = 0;
err:
fput(filep);
exit:
return ret;
}
static int
validate_import_handle(struct stream_ext_ctx_t *ctx, s32 handle,
u32 import_type)
{
int ret = 0;
struct stream_ext_obj *stream_obj = NULL;
struct file *filep = NULL;
ret = validate_handle(ctx, handle, NVSCIC2C_PCIE_OBJ_TYPE_IMPORT);
if (ret)
return ret;
filep = fget(handle);
stream_obj = filep->private_data;
if (stream_obj->import_type != import_type) {
fput(filep);
return -EINVAL;
}
fput(filep);
return ret;
}
static int
validate_flush_range(struct stream_ext_ctx_t *ctx,
struct nvscic2c_pcie_flush_range *flush_range)
{
int ret = 0;
struct file *filep = NULL;
struct stream_ext_obj *stream_obj = NULL;
if (flush_range->size <= 0)
return -EINVAL;
/* eDMA expects u32 datatype.*/
if (flush_range->size > U32_MAX)
return -EINVAL;
if (flush_range->size & 0x3)
return -EINVAL;
if (flush_range->offset & 0x3)
return -EINVAL;
ret = validate_handle(ctx, flush_range->src_handle,
NVSCIC2C_PCIE_OBJ_TYPE_SOURCE_MEM);
if (ret)
return ret;
ret = validate_import_handle(ctx, flush_range->dst_handle,
STREAM_OBJ_TYPE_MEM);
if (ret)
return ret;
filep = fget(flush_range->src_handle);
stream_obj = filep->private_data;
if ((flush_range->offset + flush_range->size) > stream_obj->vmap.size) {
fput(filep);
return -EINVAL;
}
fput(filep);
filep = fget(flush_range->dst_handle);
stream_obj = filep->private_data;
if ((flush_range->offset + flush_range->size) > stream_obj->vmap.size) {
fput(filep);
return -EINVAL;
}
fput(filep);
return 0;
}
static int
validate_copy_req_params(struct stream_ext_ctx_t *ctx,
struct copy_req_params *params)
{
u32 i = 0;
int ret = 0;
/* for each local post-fence.*/
for (i = 0; i < params->num_local_post_fences; i++) {
s32 handle = 0;
handle = params->local_post_fences[i];
ret = validate_handle(ctx, handle,
NVSCIC2C_PCIE_OBJ_TYPE_LOCAL_SYNC);
if (ret)
return ret;
}
/* for each remote post-fence.*/
for (i = 0; i < params->num_remote_post_fences; i++) {
s32 handle = 0;
handle = params->remote_post_fences[i];
ret = validate_import_handle(ctx, handle, STREAM_OBJ_TYPE_SYNC);
if (ret)
return ret;
}
/* for each flush-range.*/
for (i = 0; i < params->num_flush_ranges; i++) {
struct nvscic2c_pcie_flush_range *flush_range = NULL;
flush_range = &params->flush_ranges[i];
ret = validate_flush_range(ctx, flush_range);
if (ret)
return ret;
}
return ret;
}
static int
copy_args_from_user(struct stream_ext_ctx_t *ctx,
struct nvscic2c_pcie_submit_copy_args *args,
struct copy_req_params *params)
{
int ret = 0;
if (WARN_ON(!args->num_local_post_fences ||
!args->num_flush_ranges ||
!args->num_remote_post_fences))
return -EINVAL;
if ((args->num_local_post_fences + args->num_remote_post_fences) >
ctx->cr_limits.max_post_fences)
return -EINVAL;
if (args->num_flush_ranges > ctx->cr_limits.max_flush_ranges)
return -EINVAL;
params->num_local_post_fences = args->num_local_post_fences;
params->num_remote_post_fences = args->num_remote_post_fences;
params->num_flush_ranges = args->num_flush_ranges;
ret = copy_from_user(params->local_post_fences,
(void __user *)args->local_post_fences,
(params->num_local_post_fences * sizeof(s32)));
if (ret < 0)
return -EFAULT;
ret = copy_from_user(params->remote_post_fences,
(void __user *)args->remote_post_fences,
(params->num_remote_post_fences * sizeof(s32)));
if (ret < 0)
return -EFAULT;
ret = copy_from_user(params->remote_post_fence_values,
(void __user *)args->remote_post_fence_values,
(params->num_remote_post_fences * sizeof(u64)));
if (ret < 0)
return -EFAULT;
ret = copy_from_user(params->flush_ranges,
(void __user *)args->flush_ranges,
(params->num_flush_ranges *
sizeof(struct nvscic2c_pcie_flush_range)));
if (ret < 0)
return -EFAULT;
return 0;
}
static void
free_copy_request(struct copy_request **copy_request)
{
struct copy_request *cr = *copy_request;
if (!cr)
return;
kfree(cr->local_post_fences);
kfree(cr->remote_post_fences);
kfree(cr->remote_buf_objs);
kfree(cr->remote_post_fence_values);
kfree(cr->edma_desc);
kfree(cr->handles);
kfree(cr);
*copy_request = NULL;
}
static int
allocate_copy_request(struct stream_ext_ctx_t *ctx,
struct copy_request **copy_request)
{
int ret = 0;
struct copy_request *cr = NULL;
/*worst-case allocation for each copy request.*/
cr = kzalloc(sizeof(*cr), GFP_KERNEL);
if (WARN_ON(!cr)) {
ret = -ENOMEM;
goto err;
}
cr->ctx = ctx;
/* flush range has two handles: src, dst + all possible post_fences.*/
cr->handles = kzalloc((sizeof(*cr->handles) *
((2 * ctx->cr_limits.max_flush_ranges) +
(ctx->cr_limits.max_post_fences))),
GFP_KERNEL);
if (WARN_ON(!cr->handles)) {
ret = -ENOMEM;
goto err;
}
/* edma_desc shall include flush_range.*/
cr->edma_desc = kzalloc((sizeof(*cr->edma_desc) *
ctx->cr_limits.max_flush_ranges),
GFP_KERNEL);
if (WARN_ON(!cr->edma_desc)) {
ret = -ENOMEM;
goto err;
}
/* OR all max_post_fences could be local_post_fence. */
cr->local_post_fences = kzalloc((sizeof(*cr->local_post_fences) *
ctx->cr_limits.max_post_fences),
GFP_KERNEL);
if (WARN_ON(!cr->local_post_fences)) {
ret = -ENOMEM;
goto err;
}
cr->remote_post_fences = kzalloc((sizeof(*cr->remote_post_fences) *
ctx->cr_limits.max_post_fences),
GFP_KERNEL);
if (WARN_ON(!cr->remote_post_fences)) {
ret = -ENOMEM;
goto err;
}
cr->remote_buf_objs = kzalloc((sizeof(*cr->remote_buf_objs) *
ctx->cr_limits.max_flush_ranges),
GFP_KERNEL);
if (WARN_ON(!cr->remote_buf_objs)) {
ret = -ENOMEM;
goto err;
}
cr->remote_post_fence_values =
kzalloc((sizeof(*cr->remote_post_fence_values) *
ctx->cr_limits.max_post_fences),
GFP_KERNEL);
if (WARN_ON(!cr->remote_post_fence_values)) {
ret = -ENOMEM;
goto err;
}
*copy_request = cr;
return ret;
err:
free_copy_request(&cr);
return ret;
}
static void
free_copy_req_params(struct copy_req_params *params)
{
if (!params)
return;
kfree(params->flush_ranges);
params->flush_ranges = NULL;
kfree(params->local_post_fences);
params->local_post_fences = NULL;
kfree(params->remote_post_fences);
params->remote_post_fences = NULL;
kfree(params->remote_post_fence_values);
params->remote_post_fence_values = NULL;
}
static int
allocate_copy_req_params(struct stream_ext_ctx_t *ctx,
struct copy_req_params *params)
{
int ret = 0;
/*worst-case allocation for each.*/
params->flush_ranges = kzalloc((sizeof(*params->flush_ranges) *
ctx->cr_limits.max_flush_ranges),
GFP_KERNEL);
if (WARN_ON(!params->flush_ranges)) {
ret = -ENOMEM;
goto err;
}
params->local_post_fences =
kzalloc((sizeof(*params->local_post_fences) *
ctx->cr_limits.max_post_fences),
GFP_KERNEL);
if (WARN_ON(!params->local_post_fences)) {
ret = -ENOMEM;
goto err;
}
params->remote_post_fences =
kzalloc((sizeof(*params->remote_post_fences) *
ctx->cr_limits.max_post_fences),
GFP_KERNEL);
if (WARN_ON(!params->remote_post_fences)) {
ret = -ENOMEM;
goto err;
}
params->remote_post_fence_values =
kzalloc((sizeof(*params->remote_post_fence_values) *
ctx->cr_limits.max_post_fences),
GFP_KERNEL);
if (WARN_ON(!params->remote_post_fence_values)) {
ret = -ENOMEM;
goto err;
}
return ret;
err:
free_copy_req_params(params);
return ret;
}