diff --git a/drivers/misc/nvscic2c-pcie/endpoint.c b/drivers/misc/nvscic2c-pcie/endpoint.c index 1013d8b8..95ae50d3 100644 --- a/drivers/misc/nvscic2c-pcie/endpoint.c +++ b/drivers/misc/nvscic2c-pcie/endpoint.c @@ -40,6 +40,8 @@ enum mem_mmap_type { SELF_MEM_MMAP, /* Map Link memory segment to query link status with Peer.*/ LINK_MEM_MMAP, + /* Map eDMA error memory segment to query eDMA xfer errors.*/ + EDMA_ERR_MEM_MMAP, /* Maximum. */ MEM_MAX_MMAP, }; @@ -110,11 +112,14 @@ struct endpoint_t { /* msi irq to x86 RP */ u16 msi_irq; - /* book-keeping of peer notifications.*/ - atomic_t dataevent_count; + /* + * book-keeping of: + * peer notifications. + * PCIe link event. + * eDMA xfer error event. + */ + atomic_t event_count; - /* book-keeping of PCIe link event.*/ - atomic_t linkevent_count; u32 linkevent_id; /* propagate events when endpoint was initialized.*/ @@ -168,7 +173,7 @@ struct endpoint_drv_ctx_t { * in PCIe link status(up->down OR down->up). */ static void -link_event_callback(void *event_type, void *ctx); +event_callback(void *event_type, void *ctx); /* prototype. */ static void @@ -320,6 +325,10 @@ endpoint_fops_mmap(struct file *filp, struct vm_area_struct *vma) } ret = pci_client_mmap_link_mem(endpoint->pci_client_h, vma); goto exit; + case EDMA_ERR_MEM_MMAP: + ret = pci_client_mmap_edma_err_mem(endpoint->pci_client_h, + endpoint->minor, vma); + goto exit; default: pr_err("(%s): unrecognised mmap type: (%llu)\n", endpoint->name, mmap_type); @@ -376,13 +385,10 @@ endpoint_fops_poll(struct file *filp, poll_table *wait) /* * wake up read, write (& exception - those who want to use) fd on - * getting Link + peer notifications. + * getting Link + peer notifications + eDMA xfer error notifications. */ - if (atomic_read(&endpoint->linkevent_count)) { - atomic_dec(&endpoint->linkevent_count); - mask = (__force __poll_t)(POLLPRI | POLLIN | POLLOUT); - } else if (atomic_read(&endpoint->dataevent_count)) { - atomic_dec(&endpoint->dataevent_count); + if (atomic_read(&endpoint->event_count)) { + atomic_dec(&endpoint->event_count); mask = (__force __poll_t)(POLLPRI | POLLIN | POLLOUT); } @@ -457,14 +463,16 @@ ioctl_get_info_impl(struct endpoint_t *endpoint, endpoint->self_mem.size > U32_MAX) return -EINVAL; - get_info->nframes = endpoint->nframes; - get_info->frame_size = endpoint->frame_sz; - get_info->peer.offset = (PEER_MEM_MMAP << PAGE_SHIFT); - get_info->peer.size = endpoint->peer_mem.size; - get_info->self.offset = (SELF_MEM_MMAP << PAGE_SHIFT); - get_info->self.size = endpoint->self_mem.size; - get_info->link.offset = (LINK_MEM_MMAP << PAGE_SHIFT); - get_info->link.size = PAGE_ALIGN(sizeof(enum nvscic2c_pcie_link)); + get_info->nframes = endpoint->nframes; + get_info->frame_size = endpoint->frame_sz; + get_info->peer.offset = (PEER_MEM_MMAP << PAGE_SHIFT); + get_info->peer.size = endpoint->peer_mem.size; + get_info->self.offset = (SELF_MEM_MMAP << PAGE_SHIFT); + get_info->self.size = endpoint->self_mem.size; + get_info->link.offset = (LINK_MEM_MMAP << PAGE_SHIFT); + get_info->link.size = PAGE_ALIGN(sizeof(enum nvscic2c_pcie_link)); + get_info->edma_err.offset = (EDMA_ERR_MEM_MMAP << PAGE_SHIFT); + get_info->edma_err.size = PAGE_ALIGN(sizeof(u32)); return 0; } @@ -518,8 +526,7 @@ enable_event_handling(struct endpoint_t *endpoint) * propagate link and state change events that occur after the device * is opened and not the stale ones. */ - atomic_set(&endpoint->dataevent_count, 0); - atomic_set(&endpoint->linkevent_count, 0); + atomic_set(&endpoint->event_count, 0); atomic_set(&endpoint->event_handling, 1); } @@ -532,14 +539,13 @@ disable_event_handling(struct endpoint_t *endpoint) return ret; atomic_set(&endpoint->event_handling, 0); - atomic_set(&endpoint->linkevent_count, 0); - atomic_set(&endpoint->dataevent_count, 0); + atomic_set(&endpoint->event_count, 0); return ret; } static void -link_event_callback(void *data, void *ctx) +event_callback(void *data, void *ctx) { struct endpoint_t *endpoint = NULL; @@ -550,9 +556,9 @@ link_event_callback(void *data, void *ctx) endpoint = (struct endpoint_t *)(ctx); - /* notify only if the endpoint was openend.*/ + /* notify only if the endpoint was opened.*/ if (atomic_read(&endpoint->event_handling)) { - atomic_inc(&endpoint->linkevent_count); + atomic_inc(&endpoint->event_count); wake_up_interruptible_all(&endpoint->poll_waitq); } } @@ -634,13 +640,7 @@ syncpt_callback(void *data) { /* Skip args ceck, trusting host1x. */ - struct endpoint_t *endpoint = (struct endpoint_t *)(data); - - /* notify only if the endpoint was openend - else drain.*/ - if (atomic_read(&endpoint->event_handling)) { - atomic_inc(&endpoint->dataevent_count); - wake_up_interruptible_all(&endpoint->poll_waitq); - } + event_callback(NULL, data); } /* @@ -992,7 +992,7 @@ create_endpoint_device(struct endpoint_drv_ctx_t *eps_ctx, } /* Register for link events.*/ - ops.callback = &(link_event_callback); + ops.callback = &(event_callback); ops.ctx = (void *)(endpoint); ret = pci_client_register_for_link_event(endpoint->pci_client_h, &ops, &endpoint->linkevent_id); diff --git a/drivers/misc/nvscic2c-pcie/epc/module.c b/drivers/misc/nvscic2c-pcie/epc/module.c index 005186d5..e969b525 100644 --- a/drivers/misc/nvscic2c-pcie/epc/module.c +++ b/drivers/misc/nvscic2c-pcie/epc/module.c @@ -3,6 +3,7 @@ #define pr_fmt(fmt) "nvscic2c-pcie: epc: " fmt +#include #include #include #include @@ -222,8 +223,13 @@ nvscic2c_pcie_epc_remove(struct pci_dev *pdev) pr_err("(%s): Error waiting for endpoints to close\n", drv_ctx->drv_name); - /* if PCIe EP SoC went away abruptly already, jump to local deinit. */ - if (!pci_device_is_present(pdev)) + /* + * Jump to local deinit if any of below condition is true: + * => if PCIe EP SoC went away abruptly already. + * => if PCIe AER received. + */ + if (!pci_device_is_present(pdev) || + atomic_read(&drv_ctx->epc_ctx->aer_received)) goto deinit; /* @@ -252,15 +258,19 @@ nvscic2c_pcie_epc_remove(struct pci_dev *pdev) /* * continue wait only if PCIe EP SoC is still there. It can * go away abruptly waiting for it's own endpoints to close. + * Also check PCIe AER not received. */ - if (pci_device_is_present(pdev)) { - pr_err("(%s): Still waiting for nvscic2c-pcie-epf to close\n", - drv_ctx->drv_name); - } else { + if (!pci_device_is_present(pdev)) { pr_debug("(%s): nvscic2c-pcie-epf went away\n", drv_ctx->drv_name); break; + } else if (atomic_read(&drv_ctx->epc_ctx->aer_received)) { + pr_debug("(%s): PCIe AER received\n", + drv_ctx->drv_name); + break; } + pr_err("(%s): Still waiting for nvscic2c-pcie-epf to close\n", + drv_ctx->drv_name); } else if (timeout > 0) { pr_debug("(%s): nvscic2c-pcie-epf closed\n", drv_ctx->drv_name); @@ -280,6 +290,7 @@ deinit: pci_release_region(pdev, 0); pci_clear_master(pdev); + pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); dt_release(&drv_ctx->drv_param); @@ -324,6 +335,7 @@ nvscic2c_pcie_epc_probe(struct pci_dev *pdev, } init_completion(&epc_ctx->epf_ready_cmpl); init_completion(&epc_ctx->epf_shutdown_cmpl); + atomic_set(&epc_ctx->aer_received, 0); drv_ctx->drv_mode = DRV_MODE_EPC; drv_ctx->drv_name = name; @@ -338,6 +350,7 @@ nvscic2c_pcie_epc_probe(struct pci_dev *pdev, ret = pcim_enable_device(pdev); if (ret) goto err_enable_device; + pci_enable_pcie_error_reporting(pdev); pci_set_master(pdev); ret = pci_request_region(pdev, 0, MODULE_NAME); if (ret) @@ -477,6 +490,61 @@ err_dt_parse: return ret; } +/* + * Hot-replug is required to recover for both type of errors. + * Hence we will return PCI_ERS_RESULT_DISCONNECT in both cases. + */ +static pci_ers_result_t +nvscic2c_pcie_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct driver_ctx_t *drv_ctx = NULL; + + if (WARN_ON(!pdev)) + return PCI_ERS_RESULT_DISCONNECT; + + drv_ctx = pci_get_drvdata(pdev); + if (WARN_ON(!drv_ctx)) + return PCI_ERS_RESULT_DISCONNECT; + + atomic_set(&drv_ctx->epc_ctx->aer_received, 1); + if (state == pci_channel_io_normal) { + pr_err("AER(NONFATAL) detected for dev %04x:%02x:%02x.%x\n", + pci_domain_nr(pdev->bus), + pdev->bus->number, + PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn)); + (void)pci_client_set_link_aer_error(drv_ctx->pci_client_h, + NVSCIC2C_PCIE_AER_UNCORRECTABLE_NONFATAL); + } else { + if (state == pci_channel_io_frozen) { + pr_err("AER: FATAL detected for dev %04x:%02x:%02x.%x\n", + pci_domain_nr(pdev->bus), + pdev->bus->number, + PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn)); + } else { + pr_err("Unknow error for dev %04x:%02x:%02x.%x treat as AER: FATAL\n", + pci_domain_nr(pdev->bus), + pdev->bus->number, + PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn)); + } + (void)pci_client_set_link_aer_error(drv_ctx->pci_client_h, + NVSCIC2C_PCIE_AER_UNCORRECTABLE_FATAL); + } + + /* Mark PCIe Link down and notify all subscribers. */ + pci_client_change_link_status(drv_ctx->pci_client_h, + NVSCIC2C_PCIE_LINK_DOWN); + + return PCI_ERS_RESULT_DISCONNECT; +} + +static struct pci_error_handlers nvscic2c_pcie_error_handlers = { + .error_detected = nvscic2c_pcie_error_detected, +}; + MODULE_DEVICE_TABLE(pci, nvscic2c_pcie_epc_tbl); static struct pci_driver nvscic2c_pcie_epc_driver = { .name = DRIVER_NAME_EPC, @@ -484,6 +552,7 @@ static struct pci_driver nvscic2c_pcie_epc_driver = { .probe = nvscic2c_pcie_epc_probe, .remove = nvscic2c_pcie_epc_remove, .shutdown = nvscic2c_pcie_epc_remove, + .err_handler = &nvscic2c_pcie_error_handlers, }; module_pci_driver(nvscic2c_pcie_epc_driver); diff --git a/drivers/misc/nvscic2c-pcie/epf/module.c b/drivers/misc/nvscic2c-pcie/epf/module.c index 07e58de4..86e1c2cc 100644 --- a/drivers/misc/nvscic2c-pcie/epf/module.c +++ b/drivers/misc/nvscic2c-pcie/epf/module.c @@ -413,6 +413,7 @@ shutdown_msg_cb(void *data, void *ctx) return; } + atomic_set(&drv_ctx->epf_ctx->shutdown_msg_received, 1); /* schedule deinitialization of epf interfaces. */ schedule_work(&drv_ctx->epf_ctx->deinitialization_work); } @@ -460,16 +461,19 @@ deinit_work(struct work_struct *work) /* * Acknowledge @DRV_MODE_EPC that @DRV_MODE_EPF(this) endpoints are - * closed. If PCIe RP SoC went abnormally away(halt/reset/kernel oops) - * signal anyway (sending signal will not cause local SoC fault when - * PCIe RP SoC (@DRV_MODE_EPC) went abnormally away). + * closed if shutdown message was received from @DRV_MODE_EPC. + * If @DRV_MODE_EPC went abruptly or AER was generated, @DRV_MODE_EPC + * will not send shutdown message. */ - msg.type = COMM_MSG_TYPE_LINK; - msg.u.link.status = NVSCIC2C_PCIE_LINK_DOWN; - ret = comm_channel_ctrl_msg_send(drv_ctx->comm_channel_h, &msg); - if (ret) - pr_err("(%s): Failed to send LINK (DOWN) message\n", - drv_ctx->drv_name); + if (atomic_read(&drv_ctx->epf_ctx->shutdown_msg_received)) { + msg.type = COMM_MSG_TYPE_LINK; + msg.u.link.status = NVSCIC2C_PCIE_LINK_DOWN; + ret = comm_channel_ctrl_msg_send(drv_ctx->comm_channel_h, &msg); + if (ret) + pr_err("(%s): Failed to send LINK (DOWN) message\n", + drv_ctx->drv_name); + atomic_set(&drv_ctx->epf_ctx->shutdown_msg_received, 0); + } endpoints_release(&drv_ctx->endpoints_h); edma_module_deinit(drv_ctx); @@ -788,6 +792,9 @@ nvscic2c_pcie_epf_probe(struct pci_epf *epf) atomic_set(&drv_ctx->epf_ctx->epf_initialized, 0); init_waitqueue_head(&epf_ctx->core_initialized_waitq); + /* to check if shutdown message response required. */ + atomic_set(&epf_ctx->shutdown_msg_received, 0); + return ret; err_alloc_epf_ctx: diff --git a/drivers/misc/nvscic2c-pcie/module.h b/drivers/misc/nvscic2c-pcie/module.h index a477f17a..a591d294 100644 --- a/drivers/misc/nvscic2c-pcie/module.h +++ b/drivers/misc/nvscic2c-pcie/module.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ +/* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ /* * Internal to nvscic2c-pcie module. This file is not supposed to be included @@ -83,6 +83,7 @@ struct epf_context_t { struct work_struct deinitialization_work; atomic_t core_initialized; atomic_t epf_initialized; + atomic_t shutdown_msg_received; wait_queue_head_t core_initialized_waitq; }; @@ -90,6 +91,7 @@ struct epf_context_t { struct epc_context_t { struct completion epf_ready_cmpl; struct completion epf_shutdown_cmpl; + atomic_t aer_received; }; /* diff --git a/drivers/misc/nvscic2c-pcie/pci-client.c b/drivers/misc/nvscic2c-pcie/pci-client.c index 1ee2b32d..307ae89e 100644 --- a/drivers/misc/nvscic2c-pcie/pci-client.c +++ b/drivers/misc/nvscic2c-pcie/pci-client.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -17,8 +18,6 @@ #include #include -#include - #include #include @@ -85,6 +84,8 @@ struct pci_client_t { */ void *mem_mngr_h; + /* eDMA error memory for each endpoint. */ + struct cpu_buff_t ep_edma_err_mem[MAX_LINK_EVENT_USERS]; /* * the context of DRV_MODE_EPC/DRV_MODE_EPF */ @@ -92,6 +93,47 @@ struct pci_client_t { }; +static void +free_ep_edma_err_mem(struct pci_client_t *ctx) +{ + uint32_t i = 0U; + struct cpu_buff_t *edma_err_mem = NULL; + + if (ctx != NULL) + for (i = 0U; i < MAX_LINK_EVENT_USERS; i++) { + edma_err_mem = &ctx->ep_edma_err_mem[i]; + kfree(edma_err_mem->pva); + edma_err_mem->pva = NULL; + } +} + +static int +allocate_ep_edma_err_mem(struct pci_client_t *ctx) +{ + int ret = 0; + uint32_t i = 0U; + struct cpu_buff_t *edma_err_mem = NULL; + + for (i = 0U; i < MAX_LINK_EVENT_USERS; i++) { + edma_err_mem = &ctx->ep_edma_err_mem[i]; + edma_err_mem->size = PAGE_ALIGN(sizeof(u32)); + edma_err_mem->pva = kzalloc(edma_err_mem->size, GFP_KERNEL); + if (WARN_ON(!edma_err_mem->pva)) { + ret = -ENOMEM; + goto err; + } + /* physical address to be mmap() in user-space.*/ + edma_err_mem->phys_addr = virt_to_phys(edma_err_mem->pva); + *(u32 *)edma_err_mem->pva = NVSCIC2C_PCIE_NO_ERROR; + } + + return ret; +err: + free_ep_edma_err_mem(ctx); + + return ret; +} + static void free_link_status_mem(struct pci_client_t *ctx) { @@ -106,6 +148,7 @@ static int allocate_link_status_mem(struct pci_client_t *ctx) { int ret = 0; + struct nvscic2c_pcie_link_mem *link_mem = NULL; struct cpu_buff_t *mem = &ctx->link_status_mem; mem->size = PAGE_ALIGN(sizeof(enum nvscic2c_pcie_link)); @@ -114,7 +157,10 @@ allocate_link_status_mem(struct pci_client_t *ctx) return -ENOMEM; atomic_set(&ctx->link_status, NVSCIC2C_PCIE_LINK_DOWN); - *((enum nvscic2c_pcie_link *)mem->pva) = NVSCIC2C_PCIE_LINK_DOWN; + link_mem = ((struct nvscic2c_pcie_link_mem *)mem->pva); + + link_mem->link_status = NVSCIC2C_PCIE_LINK_DOWN; + link_mem->aer_err = NVSCIC2C_PCIE_NO_ERROR; /* physical address to be mmap() in user-space.*/ mem->phys_addr = virt_to_phys(mem->pva); @@ -250,6 +296,10 @@ pci_client_init(struct pci_client_params *params, void **pci_client_h) if (ret) goto err; + ret = allocate_ep_edma_err_mem(ctx); + if (ret) + goto err; + /* * for mapping application objs and endpoint physical memory to remote * visible area. @@ -317,6 +367,7 @@ pci_client_deinit(void **pci_client_h) ctx->mem_mngr_h = NULL; } + free_ep_edma_err_mem(ctx); free_link_status_mem(ctx); mutex_destroy(&ctx->event_tbl_lock); kfree(ctx); @@ -439,6 +490,73 @@ pci_client_mmap_link_mem(void *pci_client_h, struct vm_area_struct *vma) return ret; } +/* Helper function to mmap eDMA error memory to user-space.*/ +int +pci_client_mmap_edma_err_mem(void *pci_client_h, + u32 ep_id, struct vm_area_struct *vma) +{ + int ret = 0; + struct cpu_buff_t *edma_err_mem = NULL; + struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h; + + if (WARN_ON(!vma || !ctx)) + return -EINVAL; + + if (WARN_ON(ep_id >= MAX_LINK_EVENT_USERS)) + return -EINVAL; + + edma_err_mem = &ctx->ep_edma_err_mem[ep_id]; + if (WARN_ON(!edma_err_mem->pva)) + return -EINVAL; + + if ((vma->vm_end - vma->vm_start) != edma_err_mem->size) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + ret = remap_pfn_range(vma, + vma->vm_start, + PFN_DOWN(edma_err_mem->phys_addr), + edma_err_mem->size, + vma->vm_page_prot); + if (ret) + pr_err("remap_pfn_range returns error: (%d) for eDMA err mem\n", ret); + + return ret; +} + +/* Update eDMA xfer error code.*/ +int +pci_client_set_edma_error(void *pci_client_h, u32 ep_id, u32 err) +{ + int ret = 0; + struct event_t *event = NULL; + struct callback_ops *ops = NULL; + struct cpu_buff_t *edma_err_mem = NULL; + struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h; + + if (WARN_ON(!ctx)) + return -EINVAL; + + if (WARN_ON(ep_id > MAX_LINK_EVENT_USERS || + err != NVSCIC2C_PCIE_EDMA_XFER_ERROR)) + return -EINVAL; + + edma_err_mem = &ctx->ep_edma_err_mem[ep_id]; + *(u32 *)edma_err_mem->pva = err; + arch_invalidate_pmem(edma_err_mem->pva, edma_err_mem->size); + + mutex_lock(&ctx->event_tbl_lock); + /* notify user. */ + event = &ctx->event_tbl[ep_id]; + if (atomic_read(&event->in_use)) { + ops = &event->cb_ops; + ops->callback(NULL, ops->ctx); + } + mutex_unlock(&ctx->event_tbl_lock); + + return ret; +} + /* Query PCI link status. */ enum nvscic2c_pcie_link pci_client_query_link_status(void *pci_client_h) @@ -526,9 +644,9 @@ pci_client_change_link_status(void *pci_client_h, { u32 i = 0; int ret = 0; - struct page *page = NULL; struct event_t *event = NULL; struct callback_ops *ops = NULL; + struct nvscic2c_pcie_link_mem *link_mem = NULL; struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h; if (WARN_ON(!ctx)) @@ -544,9 +662,9 @@ pci_client_change_link_status(void *pci_client_h, * Call is arm64 specific. */ atomic_set(&ctx->link_status, status); - *((enum nvscic2c_pcie_link *)ctx->link_status_mem.pva) = status; - page = virt_to_page(ctx->link_status_mem.pva); - flush_dcache_page(page); + link_mem = ((struct nvscic2c_pcie_link_mem *)ctx->link_status_mem.pva); + link_mem->link_status = status; + arch_invalidate_pmem(ctx->link_status_mem.pva, ctx->link_status_mem.size); /* interrupt registered users. */ mutex_lock(&ctx->event_tbl_lock); @@ -562,6 +680,32 @@ pci_client_change_link_status(void *pci_client_h, return ret; } +/* Update PCIe error offset with error. */ +int +pci_client_set_link_aer_error(void *pci_client_h, u32 err) +{ + int ret = 0; + struct nvscic2c_pcie_link_mem *link_mem = NULL; + struct pci_client_t *ctx = (struct pci_client_t *)pci_client_h; + + if (WARN_ON(!ctx)) + return -EINVAL; + + if (WARN_ON((err != NVSCIC2C_PCIE_AER_UNCORRECTABLE_FATAL) && + (err != NVSCIC2C_PCIE_AER_UNCORRECTABLE_NONFATAL))) + return -EINVAL; + + link_mem = ((struct nvscic2c_pcie_link_mem *)ctx->link_status_mem.pva); + /* + * There can be more than one type of AER raised before system recovery is done. + * Hence update the offset with masked error codes. + */ + link_mem->aer_err |= err; + arch_invalidate_pmem(ctx->link_status_mem.pva, ctx->link_status_mem.size); + + return ret; +} + /* * Helper functions to set and get driver context from pci_client t * diff --git a/drivers/misc/nvscic2c-pcie/pci-client.h b/drivers/misc/nvscic2c-pcie/pci-client.h index 0941e216..b257bb99 100644 --- a/drivers/misc/nvscic2c-pcie/pci-client.h +++ b/drivers/misc/nvscic2c-pcie/pci-client.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ +/* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ #ifndef __PCI_CLIENT_H__ #define __PCI_CLIENT_H__ @@ -85,10 +85,22 @@ int pci_client_change_link_status(void *pci_client_h, enum nvscic2c_pcie_link status); +/* Update PCIe error offset with error. */ +int +pci_client_set_link_aer_error(void *pci_client_h, u32 err); + /* Helper function to mmap the PCI link status memory to user-space.*/ int pci_client_mmap_link_mem(void *pci_client_h, struct vm_area_struct *vma); +/* Helper function to mmap eDMA error memory to user-space.*/ +int +pci_client_mmap_edma_err_mem(void *pci_client_h, + u32 ep_id, struct vm_area_struct *vma); +/* Update eDMA xfer error code.*/ +int +pci_client_set_edma_error(void *pci_client_h, u32 ep_id, u32 err); + /* Query PCI link status. */ enum nvscic2c_pcie_link pci_client_query_link_status(void *pci_client_h); diff --git a/drivers/misc/nvscic2c-pcie/stream-extensions.c b/drivers/misc/nvscic2c-pcie/stream-extensions.c index 143d507d..c7fc8005 100644 --- a/drivers/misc/nvscic2c-pcie/stream-extensions.c +++ b/drivers/misc/nvscic2c-pcie/stream-extensions.c @@ -609,6 +609,9 @@ ioctl_submit_copy_request(struct stream_ext_ctx_t *ctx, ret = -EIO; atomic_dec(&ctx->transfer_count); release_copy_request_handles(cr); + /* Scheduling edma job failed. Update edma error and Notify user. */ + (void)pci_client_set_edma_error(ctx->pci_client_h, ctx->ep_id, + NVSCIC2C_PCIE_EDMA_XFER_ERROR); goto reclaim_cr; } @@ -959,6 +962,11 @@ callback_edma_xfer(void *priv, edma_xfer_status_t status, if (status == EDMA_XFER_SUCCESS) { signal_remote_post_fences(cr); signal_local_post_fences(cr); + } else { + /* eDMA xfer failed, Update eDMA error and notify user. */ + (void)pci_client_set_edma_error(cr->ctx->pci_client_h, + cr->ctx->ep_id, + NVSCIC2C_PCIE_EDMA_XFER_ERROR); } /* releases the references of the cubmit-copy handles.*/ diff --git a/include/uapi/misc/nvscic2c-pcie-ioctl.h b/include/uapi/misc/nvscic2c-pcie-ioctl.h index 99aa4b7f..bee973e5 100644 --- a/include/uapi/misc/nvscic2c-pcie-ioctl.h +++ b/include/uapi/misc/nvscic2c-pcie-ioctl.h @@ -15,12 +15,24 @@ #define MAX_NAME_SZ (32) +/* Represents PCIe runtime errors reported to user space. */ +#define NVSCIC2C_PCIE_NO_ERROR (0x00U) +#define NVSCIC2C_PCIE_EDMA_XFER_ERROR (0x01U) +#define NVSCIC2C_PCIE_AER_UNCORRECTABLE_FATAL (0x02U) +#define NVSCIC2C_PCIE_AER_UNCORRECTABLE_NONFATAL (0x04U) + /* Link status between the two peers - encapsulates PCIe link also.*/ enum nvscic2c_pcie_link { NVSCIC2C_PCIE_LINK_DOWN = 0, NVSCIC2C_PCIE_LINK_UP, }; +/* Represents layout of link status memory. */ +struct nvscic2c_pcie_link_mem { + enum nvscic2c_pcie_link link_status; + __u32 aer_err; +}; + /** * stream extensions - object type. */ @@ -70,6 +82,7 @@ struct nvscic2c_pcie_endpoint_info { struct nvscic2c_pcie_endpoint_mem_info peer; struct nvscic2c_pcie_endpoint_mem_info self; struct nvscic2c_pcie_endpoint_mem_info link; + struct nvscic2c_pcie_endpoint_mem_info edma_err; }; /**