/* * SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include #include "nv-dmabuf.h" #if defined(CONFIG_DMA_SHARED_BUFFER) typedef struct nv_dma_buf_mem_handle { // Memory handle, offset and size NvHandle h_memory; NvU64 offset; NvU64 size; // RM memdesc specific data void *mem_info; // // Refcount for phys addresses // If refcount > 0, phys address ranges in memArea are reused. // NvU64 phys_refcount; // Scatterlist of all the memory ranges associated with the buf MemoryArea memArea; } nv_dma_buf_mem_handle_t; typedef struct nv_dma_buf_file_private { // GPU device state nv_state_t *nv; // Client, device, subdevice handles NvHandle h_client; NvHandle h_device; NvHandle h_subdevice; // Total number of handles supposed to be attached to this dma-buf NvU32 total_objects; // // Number of handles actually attached to this dma-buf. // This should equal total_objects, or map fails. // NvU32 num_objects; // Total size of all handles supposed to be attached to this dma-buf NvU64 total_size; // // Size of all handles actually attached to the dma-buf // If all handles are attached, total_size and attached_size must match. // NvU64 attached_size; // Mutex to lock priv state during dma-buf callbacks struct mutex lock; // Handle info: see nv_dma_buf_mem_handle_t nv_dma_buf_mem_handle_t *handles; // RM-private info for MIG configs void *mig_info; // // Flag to indicate if phys addresses are static and can be // fetched during dma-buf create/reuse instead of in map. // NvBool static_phys_addrs; // // Type of mapping requested, one of: // NV_DMABUF_EXPORT_MAPPING_TYPE_DEFAULT // NV_DMABUF_EXPORT_MAPPING_TYPE_FORCE_PCIE // NvU8 mapping_type; // // On some coherent platforms requesting mapping_type FORCE_PCIE, // peer-to-peer is expected to bypass the IOMMU due to hardware // limitations. On such systems, IOMMU map/unmap will be skipped. // NvBool skip_iommu; struct { // True if the map attributes are cached NvBool cached; // Flag to indicate if dma-buf mmap is allowed NvBool can_mmap; // // Flag to indicate if client/user is allowed dma-buf mmap or not. // That way user can enable mmap for testing/specific // use cases and not for any all handles. // NvU64 allow_mmap; // RM-private info for cache type settings (cached/uncached/writecombined). NvU32 cache_type; // Flag to indicate if dma-buf is RO or RW memory. NvBool read_only_mem; // Memory type info: see nv_memory_type_t. nv_memory_type_t memory_type; } map_attrs; // // Flag to indicate if all GPU locks to be acquired/released before/after calling // rm_dma_buf_dup_mem_handle(). // nv_dma_buf_dup_mem_handles() acquires GPU lock only for calling pGPU // instance. However, it is not sufficient as per DupObject() SYSMEM's design // since it expects either all GPU locks to be acquired by the caller or // do not take any GPU locks. This flag is set to TRUE only for // ZERO_FB chips. // NvBool acquire_release_all_gpu_lock_on_dup; } nv_dma_buf_file_private_t; static void nv_dma_buf_free_file_private( nv_dma_buf_file_private_t *priv ) { if (priv == NULL) { return; } if (priv->handles != NULL) { os_free_mem(priv->handles); priv->handles = NULL; } mutex_destroy(&priv->lock); NV_KFREE(priv, sizeof(nv_dma_buf_file_private_t)); } static nv_dma_buf_file_private_t* nv_dma_buf_alloc_file_private( NvU32 num_handles ) { nv_dma_buf_file_private_t *priv = NULL; NvU64 handles_size = num_handles * sizeof(priv->handles[0]); NV_STATUS status; NV_KZALLOC(priv, sizeof(nv_dma_buf_file_private_t)); if (priv == NULL) { return NULL; } mutex_init(&priv->lock); status = os_alloc_mem((void **) &priv->handles, handles_size); if (status != NV_OK) { goto failed; } os_mem_set(priv->handles, 0, handles_size); return priv; failed: nv_dma_buf_free_file_private(priv); return NULL; } static void nv_reset_phys_refcount( nv_dma_buf_file_private_t *priv, NvU32 start_index, NvU32 handle_count ) { NvU32 i; for (i = 0; i < handle_count; i++) { NvU32 index = start_index + i; priv->handles[index].phys_refcount = 0; } } static NvBool nv_dec_and_check_zero_phys_refcount( nv_dma_buf_file_private_t *priv, NvU32 start_index, NvU32 handle_count ) { NvU32 i; NvBool is_zero = NV_FALSE; for (i = 0; i < handle_count; i++) { NvU32 index = start_index + i; priv->handles[index].phys_refcount--; if (priv->handles[index].phys_refcount == 0) { is_zero = NV_TRUE; } } return is_zero; } static NvBool nv_inc_and_check_one_phys_refcount( nv_dma_buf_file_private_t *priv, NvU32 start_index, NvU32 handle_count ) { NvU32 i; NvBool is_one = NV_FALSE; for (i = 0; i < handle_count; i++) { NvU32 index = start_index + i; priv->handles[index].phys_refcount++; if (priv->handles[index].phys_refcount == 1) { is_one = NV_TRUE; } } return is_one; } // Must be called with RMAPI lock and GPU lock taken static void nv_dma_buf_undup_mem_handles_unlocked( nvidia_stack_t *sp, NvU32 start_index, NvU32 num_objects, nv_dma_buf_file_private_t *priv ) { NvU32 index, i; for (i = 0; i < num_objects; i++) { index = start_index + i; if (priv->handles[index].h_memory == 0) { continue; } rm_dma_buf_undup_mem_handle(sp, priv->nv, priv->h_client, priv->handles[index].h_memory); priv->attached_size -= priv->handles[index].size; priv->handles[index].h_memory = 0; priv->handles[index].offset = 0; priv->handles[index].size = 0; priv->num_objects--; } } static void nv_dma_buf_undup_mem_handles( nvidia_stack_t *sp, NvU32 index, NvU32 num_objects, nv_dma_buf_file_private_t *priv ) { NV_STATUS status; status = rm_acquire_api_lock(sp); if (WARN_ON(status != NV_OK)) { return; } status = rm_acquire_all_gpus_lock(sp); if (WARN_ON(status != NV_OK)) { goto unlock_api_lock; } nv_dma_buf_undup_mem_handles_unlocked(sp, index, num_objects, priv); rm_release_all_gpus_lock(sp); unlock_api_lock: rm_release_api_lock(sp); } // // TODO: Temporary work around for SYSMEM Dup issue. // Take all GPU locks before calling the DupObject(). // DupObject() requires the caller to either acquire all GPU locks beforehand or // refrain from acquiring any GPU locks before invoking it. // Otherwise DupObject() will fail for already locked gpu instance with below error print // for multi gpu instance use case: // "GPU lock already acquired by this thread" for gpuInst which is already locked during // nv_dma_buf_dup_mem_handles(). // In TOT, nv_dma_buf_dup_mem_handles() acquires GPU lock only for calling pGPU // instance. However, it is not sufficient as per DupObject() SYSMEM's design since it expects // either all GPU locks to be acquired by the caller or do not take any GPU locks. // PDB_PROP_GPU_ZERO_FB chips (iGPU) doesn't have local memory. In this case, // SYSMEM is used as Device resources. priv->acquire_release_all_gpu_lock_on_dup flag set as // NV_TRUE only for PDB_PROP_GPU_ZERO_FB chips. // // Proper Fix (Bug 4866388): // The RS_FLAGS_ACQUIRE_RELAXED_GPUS_LOCK_ON_DUP flag was introduced to allow an // RM class to take GPU Group Lock if the source and the destination object // belongs to the same pGpu. Take all GPUs lock otherwise. // With above change, we are seeing test failures. // Until the above proper fix is added, we need to rely on temporary work around. // static inline NV_STATUS nv_dma_buf_acquire_gpu_lock( nvidia_stack_t *sp, nv_dma_buf_file_private_t *priv ) { return (priv->acquire_release_all_gpu_lock_on_dup ? rm_acquire_all_gpus_lock(sp): rm_acquire_gpu_lock(sp, priv->nv)); } static inline NV_STATUS nv_dma_buf_release_gpu_lock( nvidia_stack_t *sp, nv_dma_buf_file_private_t *priv ) { return (priv->acquire_release_all_gpu_lock_on_dup ? rm_release_all_gpus_lock(sp): rm_release_gpu_lock(sp, priv->nv)); } static NV_STATUS nv_dma_buf_dup_mem_handles( nvidia_stack_t *sp, nv_dma_buf_file_private_t *priv, nv_ioctl_export_to_dma_buf_fd_t *params ) { NV_STATUS status = NV_OK; NvU32 index = params->index; NvU32 count = 0; NvU32 i = 0; status = rm_acquire_api_lock(sp); if (status != NV_OK) { return status; } status = nv_dma_buf_acquire_gpu_lock(sp, priv); if (status != NV_OK) { goto unlock_api_lock; } for (i = 0; i < params->numObjects; i++) { NvHandle h_memory_duped = 0; void *mem_info = NULL; nv_memory_type_t memory_type = NV_MEMORY_TYPE_SYSTEM; NvBool can_mmap; NvU32 cache_type; NvBool read_only_mem; if (priv->handles[index].h_memory != 0) { status = NV_ERR_IN_USE; goto failed; } if (params->sizes[i] > priv->total_size - priv->attached_size) { status = NV_ERR_INVALID_ARGUMENT; goto failed; } status = rm_dma_buf_dup_mem_handle(sp, priv->nv, params->hClient, priv->h_client, priv->h_device, priv->h_subdevice, priv->mig_info, params->handles[i], params->offsets[i], params->sizes[i], &h_memory_duped, &mem_info, &can_mmap, &cache_type, &read_only_mem, &memory_type); if (status != NV_OK) { goto failed; } if (priv->map_attrs.cached) { if ((can_mmap != priv->map_attrs.can_mmap) || (cache_type != priv->map_attrs.cache_type) || (read_only_mem != priv->map_attrs.read_only_mem) || (memory_type != priv->map_attrs.memory_type)) { // Creating mixed dma_buf is not supported. status = NV_ERR_INVALID_ARGUMENT; goto failed; } } else { // Store the handle's mmap, RO and cache type info. priv->map_attrs.can_mmap = can_mmap; priv->map_attrs.cache_type = cache_type; priv->map_attrs.read_only_mem = read_only_mem; priv->map_attrs.memory_type = memory_type; priv->map_attrs.cached = NV_TRUE; } priv->attached_size += params->sizes[i]; priv->handles[index].h_memory = h_memory_duped; priv->handles[index].offset = params->offsets[i]; priv->handles[index].size = params->sizes[i]; priv->handles[index].mem_info = mem_info; priv->num_objects++; index++; count++; } if ((priv->num_objects == priv->total_objects) && (priv->attached_size != priv->total_size)) { status = NV_ERR_INVALID_ARGUMENT; goto failed; } nv_dma_buf_release_gpu_lock(sp, priv); rm_release_api_lock(sp); return NV_OK; failed: nv_dma_buf_undup_mem_handles_unlocked(sp, params->index, count, priv); nv_dma_buf_release_gpu_lock(sp, priv); unlock_api_lock: rm_release_api_lock(sp); return status; } static void nv_put_phys_addresses( nvidia_stack_t *sp, nv_dma_buf_file_private_t *priv, NvU32 start_index, NvU32 mapped_handle_count ) { NvU32 i; for (i = 0; i < mapped_handle_count; i++) { NvU32 index = start_index + i; if (priv->handles[index].phys_refcount > 0) { continue; } // Per-handle memArea is freed by RM rm_dma_buf_unmap_mem_handle(sp, priv->nv, priv->h_client, priv->handles[index].h_memory, priv->mapping_type, priv->handles[index].mem_info, priv->static_phys_addrs, priv->handles[index].memArea); priv->handles[index].memArea.numRanges = 0; } } static void nv_dma_buf_put_phys_addresses ( nv_dma_buf_file_private_t *priv, NvU32 start_index, NvU32 handle_count ) { NV_STATUS status; nvidia_stack_t *sp = NULL; NvBool api_lock_taken = NV_FALSE; NvBool gpu_lock_taken = NV_FALSE; int rc = 0; if (!nv_dec_and_check_zero_phys_refcount(priv, start_index, handle_count)) { return; } rc = nv_kmem_cache_alloc_stack(&sp); if (WARN_ON(rc != 0)) { return; } if (!priv->static_phys_addrs) { status = rm_acquire_api_lock(sp); if (WARN_ON(status != NV_OK)) { goto free_sp; } api_lock_taken = NV_TRUE; status = rm_acquire_gpu_lock(sp, priv->nv); if (WARN_ON(status != NV_OK)) { goto unlock_api_lock; } gpu_lock_taken = NV_TRUE; } nv_put_phys_addresses(sp, priv, start_index, handle_count); if (gpu_lock_taken) { rm_release_gpu_lock(sp, priv->nv); } unlock_api_lock: if (api_lock_taken) { rm_release_api_lock(sp); } free_sp: nv_kmem_cache_free_stack(sp); } static NV_STATUS nv_dma_buf_get_phys_addresses ( nv_dma_buf_file_private_t *priv, NvU32 start_index, NvU32 handle_count ) { NV_STATUS status = NV_OK; nvidia_stack_t *sp = NULL; NvBool api_lock_taken = NV_FALSE; NvBool gpu_lock_taken = NV_FALSE; NvU32 i; int rc = 0; if (!nv_inc_and_check_one_phys_refcount(priv, start_index, handle_count)) { return NV_OK; } rc = nv_kmem_cache_alloc_stack(&sp); if (rc != 0) { status = NV_ERR_NO_MEMORY; goto failed; } // // Locking is not needed for static phys address configs because the memdesc // is not expected to change in this case and we hold the refcount on the // owner GPU and memory before referencing it. // if (!priv->static_phys_addrs) { status = rm_acquire_api_lock(sp); if (status != NV_OK) { goto free_sp; } api_lock_taken = NV_TRUE; status = rm_acquire_gpu_lock(sp, priv->nv); if (status != NV_OK) { goto unlock_api_lock; } gpu_lock_taken = NV_TRUE; } for (i = 0; i < handle_count; i++) { NvU32 index = start_index + i; if (priv->handles[index].phys_refcount > 1) { continue; } // Per-handle memArea is allocated by RM status = rm_dma_buf_map_mem_handle(sp, priv->nv, priv->h_client, priv->handles[index].h_memory, mrangeMake(priv->handles[index].offset, priv->handles[index].size), priv->mapping_type, priv->handles[index].mem_info, priv->static_phys_addrs, &priv->handles[index].memArea); if (status != NV_OK) { goto unmap_handles; } } if (gpu_lock_taken) { rm_release_gpu_lock(sp, priv->nv); } if (api_lock_taken) { rm_release_api_lock(sp); } nv_kmem_cache_free_stack(sp); return NV_OK; unmap_handles: nv_put_phys_addresses(sp, priv, start_index, i); if (gpu_lock_taken) { rm_release_gpu_lock(sp, priv->nv); } unlock_api_lock: if (api_lock_taken) { rm_release_api_lock(sp); } free_sp: nv_kmem_cache_free_stack(sp); failed: nv_reset_phys_refcount(priv, start_index, handle_count); return status; } static inline unsigned long nv_dma_buf_get_dev_attrs( struct device *dev ) { unsigned long dev_attrs = 0; #if defined(NV_DMA_ATTR_SKIP_CPU_SYNC_PRESENT) dev_attrs |= DMA_ATTR_SKIP_CPU_SYNC; #endif // defined(NV_DMA_ATTR_SKIP_CPU_SYNC_PRESENT) return dev_attrs; } static void nv_dma_buf_unmap_pages( struct device *dev, struct sg_table *sgt, nv_dma_buf_file_private_t *priv ) { if (priv->skip_iommu) { return; } dma_unmap_sg_attrs(dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL, nv_dma_buf_get_dev_attrs(dev)); } static void nv_dma_buf_unmap_pfns( struct device *dev, struct sg_table *sgt, nv_dma_buf_file_private_t *priv ) { nv_dma_device_t peer_dma_dev = {{ 0 }}; struct scatterlist *sg = sgt->sgl; NvU32 i; if (priv->skip_iommu) { return; } peer_dma_dev.dev = dev; peer_dma_dev.addressable_range.limit = (NvU64)dev->dma_mask; for_each_sg(sgt->sgl, sg, sgt->nents, i) { nv_dma_unmap_peer(&peer_dma_dev, (sg_dma_len(sg) >> PAGE_SHIFT), sg_dma_address(sg)); } } static NvU32 nv_dma_buf_get_sg_count ( struct device *dev, nv_dma_buf_file_private_t *priv, NvU32 *max_seg_size ) { NvU32 dma_max_seg_size, i; NvU32 nents = 0; dma_max_seg_size = NV_ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE); if (dma_max_seg_size < PAGE_SIZE) { return 0; } // Calculate nents needed to allocate sg_table for (i = 0; i < priv->num_objects; i++) { NvU32 range_count = priv->handles[i].memArea.numRanges; NvU32 index; for (index = 0; index < range_count; index++) { NvU64 length = priv->handles[i].memArea.pRanges[index].size; NvU64 count = length + dma_max_seg_size - 1; do_div(count, dma_max_seg_size); nents += count; } } *max_seg_size = dma_max_seg_size; return nents; } static struct sg_table* nv_dma_buf_map_pages ( struct device *dev, nv_dma_buf_file_private_t *priv ) { struct sg_table *sgt = NULL; struct scatterlist *sg; NvU32 dma_max_seg_size = 0; NvU32 i, nents; int rc; nents = nv_dma_buf_get_sg_count(dev, priv, &dma_max_seg_size); NV_KZALLOC(sgt, sizeof(struct sg_table)); if (sgt == NULL) { return NULL; } rc = sg_alloc_table(sgt, nents, GFP_KERNEL); if (rc != 0) { goto free_sgt; } sg = sgt->sgl; for (i = 0; i < priv->num_objects; i++) { NvU32 range_count = priv->handles[i].memArea.numRanges; NvU32 index = 0; for (index = 0; index < range_count; index++) { NvU64 dma_addr = priv->handles[i].memArea.pRanges[index].start; NvU64 dma_len = priv->handles[i].memArea.pRanges[index].size; // Split each range into dma_max_seg_size chunks while(dma_len != 0) { NvU32 sg_len = NV_MIN(dma_len, dma_max_seg_size); struct page *page = NV_GET_PAGE_STRUCT(dma_addr); if ((page == NULL) || (sg == NULL)) { goto free_table; } sg_set_page(sg, page, sg_len, NV_GET_OFFSET_IN_PAGE(dma_addr)); dma_addr += sg_len; dma_len -= sg_len; sg = sg_next(sg); } } } WARN_ON(sg != NULL); // DMA map the sg_table rc = dma_map_sg_attrs(dev, sgt->sgl, sgt->orig_nents, DMA_BIDIRECTIONAL, nv_dma_buf_get_dev_attrs(dev)); if (rc <= 0) { goto free_table; } sgt->nents = rc; return sgt; free_table: sg_free_table(sgt); free_sgt: NV_KFREE(sgt, sizeof(struct sg_table)); return NULL; } static struct sg_table* nv_dma_buf_map_pfns ( struct device *dev, nv_dma_buf_file_private_t *priv ) { NV_STATUS status; struct sg_table *sgt = NULL; struct scatterlist *sg; nv_dma_device_t peer_dma_dev = {{ 0 }}; NvU32 dma_max_seg_size = 0; NvU32 mapped_nents = 0; NvU32 i = 0; NvU32 nents; int rc = 0; peer_dma_dev.dev = dev; peer_dma_dev.addressable_range.limit = (NvU64)dev->dma_mask; nents = nv_dma_buf_get_sg_count(dev, priv, &dma_max_seg_size); NV_KZALLOC(sgt, sizeof(struct sg_table)); if (sgt == NULL) { return NULL; } rc = sg_alloc_table(sgt, nents, GFP_KERNEL); if (rc != 0) { goto free_sgt; } sg = sgt->sgl; for (i = 0; i < priv->num_objects; i++) { NvU32 range_count = priv->handles[i].memArea.numRanges; NvU32 index = 0; for (index = 0; index < range_count; index++) { NvU64 dma_addr = priv->handles[i].memArea.pRanges[index].start; NvU64 dma_len = priv->handles[i].memArea.pRanges[index].size; // Break the scatterlist into dma_max_seg_size chunks while(dma_len != 0) { NvU32 sg_len = NV_MIN(dma_len, dma_max_seg_size); if (sg == NULL) { goto unmap_pfns; } if (!priv->skip_iommu) { if (priv->nv->coherent) { status = nv_dma_map_non_pci_peer(&peer_dma_dev, (sg_len >> PAGE_SHIFT), &dma_addr); } else { status = nv_dma_map_peer(&peer_dma_dev, priv->nv->dma_dev, 0x1, (sg_len >> PAGE_SHIFT), &dma_addr); } if (status != NV_OK) { goto unmap_pfns; } } sg_set_page(sg, NULL, sg_len, 0); sg_dma_address(sg) = (dma_addr_t) dma_addr; sg_dma_len(sg) = sg_len; dma_addr += sg_len; dma_len -= sg_len; mapped_nents++; sg = sg_next(sg); } } } WARN_ON(sg != NULL); sgt->nents = mapped_nents; WARN_ON(sgt->nents != sgt->orig_nents); return sgt; unmap_pfns: sgt->nents = mapped_nents; nv_dma_buf_unmap_pfns(dev, sgt, priv); sg_free_table(sgt); free_sgt: NV_KFREE(sgt, sizeof(struct sg_table)); return NULL; } static int nv_dma_buf_attach( struct dma_buf *buf, #if defined(NV_DMA_BUF_OPS_ATTACH_ARG2_DEV) struct device *dev, #endif struct dma_buf_attachment *attachment ) { int rc = 0; nv_dma_buf_file_private_t *priv = buf->priv; mutex_lock(&priv->lock); if (priv->mapping_type == NV_DMABUF_EXPORT_MAPPING_TYPE_FORCE_PCIE) { if(!nv_pci_is_valid_topology_for_direct_pci(priv->nv, to_pci_dev(attachment->dev))) { nv_printf(NV_DBG_ERRORS, "NVRM: dma-buf attach failed: " "topology not supported for mapping type FORCE_PCIE\n"); rc = -ENOTSUPP; goto unlock_priv; } priv->skip_iommu = NV_TRUE; } else { nv_dma_device_t peer_dma_dev = {{ 0 }}; peer_dma_dev.dev = &to_pci_dev(attachment->dev)->dev; peer_dma_dev.addressable_range.limit = to_pci_dev(attachment->dev)->dma_mask; if (!nv_grdma_pci_topology_supported(priv->nv, &peer_dma_dev)) { nv_printf(NV_DBG_ERRORS, "NVRM: dma-buf attach failed: " "PCI topology not supported for dma-buf\n"); rc = -ENOTSUPP; goto unlock_priv; } } #if defined(NV_DMA_BUF_ATTACHMENT_HAS_PEER2PEER) if ((attachment->importer_ops != NULL) && (!attachment->peer2peer) && (!priv->nv->mem_has_struct_page)) { nv_printf(NV_DBG_ERRORS, "NVRM: dma-buf attach failed: " "importer unable to handle MMIO without struct page\n"); rc = -ENOTSUPP; goto unlock_priv; } #endif unlock_priv: mutex_unlock(&priv->lock); return rc; } static struct sg_table* nv_dma_buf_map( struct dma_buf_attachment *attachment, enum dma_data_direction direction ) { NV_STATUS status; struct sg_table *sgt = NULL; struct dma_buf *buf = attachment->dmabuf; nv_dma_buf_file_private_t *priv = buf->priv; mutex_lock(&priv->lock); if (priv->num_objects != priv->total_objects) { goto unlock_priv; } if (!priv->static_phys_addrs) { status = nv_dma_buf_get_phys_addresses(priv, 0, priv->num_objects); if (status != NV_OK) { goto unlock_priv; } } // // For MAPPING_TYPE_FORCE_PCIE on coherent platforms, // get the BAR1 PFN scatterlist instead of C2C pages. // if (priv->nv->mem_has_struct_page && (priv->mapping_type == NV_DMABUF_EXPORT_MAPPING_TYPE_DEFAULT)) { sgt = nv_dma_buf_map_pages(attachment->dev, priv); } else { sgt = nv_dma_buf_map_pfns(attachment->dev, priv); } if (sgt == NULL) { goto unmap_handles; } mutex_unlock(&priv->lock); return sgt; unmap_handles: if (!priv->static_phys_addrs) { nv_dma_buf_put_phys_addresses(priv, 0, priv->num_objects); } unlock_priv: mutex_unlock(&priv->lock); return NULL; } static void nv_dma_buf_unmap( struct dma_buf_attachment *attachment, struct sg_table *sgt, enum dma_data_direction direction ) { struct dma_buf *buf = attachment->dmabuf; nv_dma_buf_file_private_t *priv = buf->priv; mutex_lock(&priv->lock); if (priv->nv->mem_has_struct_page && (priv->mapping_type == NV_DMABUF_EXPORT_MAPPING_TYPE_DEFAULT)) { nv_dma_buf_unmap_pages(attachment->dev, sgt, priv); } else { nv_dma_buf_unmap_pfns(attachment->dev, sgt, priv); } // // For static_phys_addrs platforms, this operation is done in release // since getting the phys_addrs was done in create/reuse. // if (!priv->static_phys_addrs) { nv_dma_buf_put_phys_addresses(priv, 0, priv->num_objects); } sg_free_table(sgt); NV_KFREE(sgt, sizeof(struct sg_table)); mutex_unlock(&priv->lock); } static void nv_dma_buf_release( struct dma_buf *buf ) { int rc = 0; NvU32 i; nvidia_stack_t *sp = NULL; nv_dma_buf_file_private_t *priv = buf->priv; nv_state_t *nv; if (priv == NULL) { return; } nv = priv->nv; if (priv->static_phys_addrs) { nv_dma_buf_put_phys_addresses(priv, 0, priv->num_objects); } rc = nv_kmem_cache_alloc_stack(&sp); if (WARN_ON(rc != 0)) { return; } // phys_addr refcounts must be zero at this point for (i = 0; i < priv->num_objects; i++) { WARN_ON(priv->handles[i].phys_refcount > 0); } nv_dma_buf_undup_mem_handles(sp, 0, priv->num_objects, priv); rm_dma_buf_put_client_and_device(sp, priv->nv, priv->h_client, priv->h_device, priv->h_subdevice, priv->mig_info); WARN_ON(priv->attached_size > 0); WARN_ON(priv->num_objects > 0); nv_dma_buf_free_file_private(priv); buf->priv = NULL; nvidia_dev_put(nv->gpu_id, sp); nv_kmem_cache_free_stack(sp); return; } static int nv_dma_buf_mmap( struct dma_buf *buf, struct vm_area_struct *vma ) { int ret = 0; NvU32 i = 0; nv_dma_buf_file_private_t *priv = buf->priv; unsigned long addr = vma->vm_start; NvU32 total_skip_size = 0; NvU64 total_map_len = NV_VMA_SIZE(vma); NvU64 off_in_range_array = 0; NvU32 index; if (priv == NULL) { nv_printf(NV_DBG_ERRORS, "NVRM: nv_dma_buf_mmap: priv == NULL.\n"); return -EINVAL; } mutex_lock(&priv->lock); if (!priv->map_attrs.can_mmap) { nv_printf(NV_DBG_ERRORS, "NVRM: nv_dma_buf_mmap: mmap is not allowed can_mmap[%d] \n", priv->map_attrs.can_mmap); ret = -ENOTSUPP; goto unlock_priv; } // Check for offset overflow. if ((NV_VMA_OFFSET(vma) + NV_VMA_SIZE(vma)) < NV_VMA_OFFSET(vma)) { ret = -EOVERFLOW; goto unlock_priv; } if ((NV_VMA_OFFSET(vma) + NV_VMA_SIZE(vma)) > priv->total_size) { nv_printf(NV_DBG_ERRORS, "NVRM: nv_dma_buf_mmap: Vaddr_start[%llx] Vaddr_end[%llx] " "vm_pgoff[%llx] page_offset[%llx] " "page_prot[%x] total_size[%llx] \n", vma->vm_start, vma->vm_end, NV_VMA_PGOFF(vma), NV_VMA_OFFSET(vma), pgprot_val(vma->vm_page_prot), priv->total_size); ret = -EINVAL; goto unlock_priv; } nv_printf(NV_DBG_INFO, "NVRM: nv_dma_buf_mmap: Vaddr_start[%llx] Vaddr_end[%llx] " "os_page_size[%llx] vm_pgoff[%llx] page_offset[%llx] " "page_prot[%x] total_size[%llx] total_map_len[%llx] \n", vma->vm_start, vma->vm_end, PAGE_SIZE, NV_VMA_PGOFF(vma), NV_VMA_OFFSET(vma), pgprot_val(vma->vm_page_prot), priv->total_size, total_map_len); // Find the first range from which map should start. for (i = 0; i < priv->num_objects; i++) { NvU32 range_count = priv->handles[i].memArea.numRanges; for (index = 0; index < range_count; index++) { NvU64 len = priv->handles[i].memArea.pRanges[index].size; total_skip_size += len; // // Skip memArea.pRanges[index] until to find out the // first mapping page start in the memArea range_count. // skip pages which lie outside of offset/map length. // if (NV_VMA_OFFSET(vma) >= total_skip_size) { continue; } total_skip_size -= len; // // First mapping page start can be anywhere in the specific // memArea.pRanges[index]. So adjust off_in_range_array accordingly. // off_in_range_array = (NV_VMA_OFFSET(vma) - total_skip_size); total_skip_size += off_in_range_array; goto found_start_page; } } // Could not find first map page. nv_printf(NV_DBG_ERRORS, "NVRM: [nv_dma_buf_mmap-failed] Could not find first map page \n"); ret = -EINVAL; goto unlock_priv; found_start_page: // RO and cache type settings if (nv_encode_caching(&vma->vm_page_prot, priv->map_attrs.cache_type, priv->map_attrs.memory_type)) { nv_printf(NV_DBG_ERRORS, "NVRM: [nv_dma_buf_mmap-failed] i[%u] cache_type[%llx] memory_type[%d] page_prot[%x] \n", i, priv->map_attrs.cache_type, priv->map_attrs.memory_type, pgprot_val(vma->vm_page_prot)); ret = -ENXIO; goto unlock_priv; } if (priv->map_attrs.read_only_mem) { vma->vm_page_prot = NV_PGPROT_READ_ONLY(vma->vm_page_prot); nv_vm_flags_clear(vma, VM_WRITE); nv_vm_flags_clear(vma, VM_MAYWRITE); } nv_vm_flags_set(vma, VM_SHARED | VM_DONTEXPAND | VM_DONTDUMP); // Create user mapping for (; i < (priv->num_objects && (addr < vma->vm_end)); i++) { NvU32 range_count = priv->handles[i].memArea.numRanges; for (; (index < range_count && (addr < vma->vm_end)); index++) { NvU64 len = priv->handles[i].memArea.pRanges[index].size; NvU64 map_len = 0; NvU64 phy_addr; phy_addr = (priv->handles[i].memArea.pRanges[index].start + off_in_range_array); len -= off_in_range_array; // Reset to 0, after its initial use. off_in_range_array = 0; map_len = NV_MIN(len, total_map_len); // // nv_remap_page_range() map a contiguous physical address space // into the user virtual space. // Use PFN based mapping api to create the mapping for // reserved carveout (OS invisible memory, not managed by OS) too. // Basically nv_remap_page_range() works for all kind of memory regions. // These are the downsides of using nv_remap_page_range() // 1. We can't use vm_insert_pages() batching API, so perf overhead to // map every page individually. // 2. We can't support use case to call pin_user_pages() on dma-buf's CPU VA. // We will revisit this code path in the future if needed. // ret = nv_remap_page_range(vma, addr, phy_addr, map_len, vma->vm_page_prot); if (ret) { nv_printf(NV_DBG_ERRORS, "NVRM: nv_dma_buf_mmap: remap_pfn_range - failed\n", ret); // Partial mapping is going to be freed by kernel if nv_dma_buf_mmap() fails. goto unlock_priv; } nv_printf(NV_DBG_INFO, "NVRM: nv_dma_buf_mmap: index[%u] range_count[%u] Vaddr[%llx] " "page_prot[%x] phyAddr[%llx] mapLen[%llx] len[%llx] " "total_map_len[%llx] \n", index, range_count, addr, pgprot_val(vma->vm_page_prot), phy_addr, map_len, len, total_map_len); total_map_len -= map_len; addr += map_len; } } mutex_unlock(&priv->lock); return 0; unlock_priv: mutex_unlock(&priv->lock); return ret; } #if defined(NV_DMA_BUF_OPS_HAS_MAP) static void* nv_dma_buf_map_stub( struct dma_buf *buf, unsigned long page_num ) { return NULL; } static void nv_dma_buf_unmap_stub( struct dma_buf *buf, unsigned long page_num, void *addr ) { return; } #endif #if defined(NV_DMA_BUF_OPS_HAS_MAP_ATOMIC) static void* nv_dma_buf_map_atomic_stub( struct dma_buf *buf, unsigned long page_num ) { return NULL; } static void nv_dma_buf_unmap_atomic_stub( struct dma_buf *buf, unsigned long page_num, void *addr ) { return; } #endif // // Note: Some of the dma-buf operations are mandatory in some kernels. // So stubs are added to prevent dma_buf_export() failure. // The actual implementations of these interfaces is not really required // for the export operation to work. // static const struct dma_buf_ops nv_dma_buf_ops = { .attach = nv_dma_buf_attach, .map_dma_buf = nv_dma_buf_map, .unmap_dma_buf = nv_dma_buf_unmap, .release = nv_dma_buf_release, .mmap = nv_dma_buf_mmap, #if defined(NV_DMA_BUF_OPS_HAS_MAP) .map = nv_dma_buf_map_stub, .unmap = nv_dma_buf_unmap_stub, #endif #if defined(NV_DMA_BUF_OPS_HAS_MAP_ATOMIC) .map_atomic = nv_dma_buf_map_atomic_stub, .unmap_atomic = nv_dma_buf_unmap_atomic_stub, #endif }; static NV_STATUS nv_dma_buf_create( nv_state_t *nv, nv_ioctl_export_to_dma_buf_fd_t *params ) { int rc = 0; NV_STATUS status; nvidia_stack_t *sp = NULL; struct dma_buf *buf = NULL; nv_dma_buf_file_private_t *priv = NULL; NvU32 gpu_id = nv->gpu_id; if (!nv->dma_buf_supported) { return NV_ERR_NOT_SUPPORTED; } if (params->index > (params->totalObjects - params->numObjects)) { return NV_ERR_INVALID_ARGUMENT; } priv = nv_dma_buf_alloc_file_private(params->totalObjects); if (priv == NULL) { nv_printf(NV_DBG_ERRORS, "NVRM: failed to allocate dma-buf private\n"); return NV_ERR_NO_MEMORY; } priv->total_objects = params->totalObjects; priv->total_size = params->totalSize; priv->nv = nv; priv->mapping_type = params->mappingType; priv->skip_iommu = NV_FALSE; priv->map_attrs.allow_mmap = params->bAllowMmap; rc = nv_kmem_cache_alloc_stack(&sp); if (rc != 0) { status = NV_ERR_NO_MEMORY; goto cleanup_priv; } rc = nvidia_dev_get(gpu_id, sp); if (rc != 0) { status = NV_ERR_OPERATING_SYSTEM; goto cleanup_sp; } status = rm_dma_buf_get_client_and_device(sp, priv->nv, params->hClient, params->handles[0], priv->mapping_type, &priv->h_client, &priv->h_device, &priv->h_subdevice, &priv->mig_info, &priv->static_phys_addrs, &priv->acquire_release_all_gpu_lock_on_dup); if (status != NV_OK) { goto cleanup_device; } status = nv_dma_buf_dup_mem_handles(sp, priv, params); if (status != NV_OK) { goto cleanup_client_and_device; } if (priv->map_attrs.allow_mmap && !priv->map_attrs.can_mmap) { nv_printf(NV_DBG_ERRORS, "NVRM: mmap is not allowed for the specific handles\n"); status = NV_ERR_NOT_SUPPORTED; goto cleanup_handles; } // User can enable mmap for testing/specific use cases and not for any all handles. if (!priv->map_attrs.allow_mmap) { priv->map_attrs.can_mmap = NV_FALSE; } // Get CPU static phys addresses if possible to do so at this time. if (priv->static_phys_addrs) { status = nv_dma_buf_get_phys_addresses(priv, params->index, params->numObjects); if (status != NV_OK) { goto cleanup_handles; } } { DEFINE_DMA_BUF_EXPORT_INFO(exp_info); exp_info.ops = &nv_dma_buf_ops; exp_info.size = params->totalSize; exp_info.flags = O_RDWR | O_CLOEXEC; exp_info.priv = priv; exp_info.exp_name = "nv_dmabuf"; buf = dma_buf_export(&exp_info); } if (IS_ERR(buf)) { nv_printf(NV_DBG_ERRORS, "NVRM: failed to create dma-buf\n"); status = NV_ERR_OPERATING_SYSTEM; goto put_phys_addrs; } nv_kmem_cache_free_stack(sp); rc = dma_buf_fd(buf, O_RDWR | O_CLOEXEC); if (rc < 0) { nv_printf(NV_DBG_ERRORS, "NVRM: failed to get dma-buf file descriptor\n"); // // If dma-buf is successfully created, the dup'd handles // clean-up should be done by the release callback. // dma_buf_put(buf); return NV_ERR_OPERATING_SYSTEM; } params->fd = rc; return NV_OK; put_phys_addrs: if (priv->static_phys_addrs) { nv_dma_buf_put_phys_addresses(priv, params->index, params->numObjects); } cleanup_handles: nv_dma_buf_undup_mem_handles(sp, params->index, params->numObjects, priv); cleanup_client_and_device: rm_dma_buf_put_client_and_device(sp, priv->nv, priv->h_client, priv->h_device, priv->h_subdevice, priv->mig_info); cleanup_device: nvidia_dev_put(gpu_id, sp); cleanup_sp: nv_kmem_cache_free_stack(sp); cleanup_priv: nv_dma_buf_free_file_private(priv); return status; } static NV_STATUS nv_dma_buf_reuse( nv_state_t *nv, nv_ioctl_export_to_dma_buf_fd_t *params ) { int rc = 0; NV_STATUS status = NV_OK; nvidia_stack_t *sp = NULL; struct dma_buf *buf = NULL; nv_dma_buf_file_private_t *priv = NULL; buf = dma_buf_get(params->fd); if (IS_ERR(buf)) { nv_printf(NV_DBG_ERRORS, "NVRM: failed to get dma-buf\n"); return NV_ERR_OPERATING_SYSTEM; } if (buf->ops != &nv_dma_buf_ops) { nv_printf(NV_DBG_ERRORS, "NVRM: Invalid dma-buf fd\n"); status = NV_ERR_INVALID_ARGUMENT; goto cleanup_dmabuf; } priv = buf->priv; if (priv == NULL) { status = NV_ERR_OPERATING_SYSTEM; goto cleanup_dmabuf; } rc = mutex_lock_interruptible(&priv->lock); if (rc != 0) { status = NV_ERR_OPERATING_SYSTEM; goto cleanup_dmabuf; } if ((priv->total_objects < params->numObjects) || (params->index > (priv->total_objects - params->numObjects)) || (params->mappingType != priv->mapping_type) || (params->bAllowMmap != priv->map_attrs.allow_mmap)) { status = NV_ERR_INVALID_ARGUMENT; goto unlock_priv; } rc = nv_kmem_cache_alloc_stack(&sp); if (rc != 0) { status = NV_ERR_NO_MEMORY; goto unlock_priv; } status = nv_dma_buf_dup_mem_handles(sp, priv, params); if (status != NV_OK) { goto cleanup_sp; } // Get CPU static phys addresses if possible to do so at this time. if (priv->static_phys_addrs) { status = nv_dma_buf_get_phys_addresses(priv, params->index, params->numObjects); if (status != NV_OK) { goto cleanup_handles; } } nv_kmem_cache_free_stack(sp); mutex_unlock(&priv->lock); dma_buf_put(buf); return NV_OK; cleanup_handles: nv_dma_buf_undup_mem_handles(sp, params->index, params->numObjects, priv); cleanup_sp: nv_kmem_cache_free_stack(sp); unlock_priv: mutex_unlock(&priv->lock); cleanup_dmabuf: dma_buf_put(buf); return status; } #endif // CONFIG_DMA_SHARED_BUFFER NV_STATUS nv_dma_buf_export( nv_state_t *nv, nv_ioctl_export_to_dma_buf_fd_t *params ) { #if defined(CONFIG_DMA_SHARED_BUFFER) NV_STATUS status; if ((params == NULL) || (params->totalSize == 0) || (params->numObjects == 0) || (params->totalObjects == 0) || (params->numObjects > NV_DMABUF_EXPORT_MAX_HANDLES) || (params->numObjects > params->totalObjects)) { return NV_ERR_INVALID_ARGUMENT; } if ((params->mappingType != NV_DMABUF_EXPORT_MAPPING_TYPE_DEFAULT) && (params->mappingType != NV_DMABUF_EXPORT_MAPPING_TYPE_FORCE_PCIE)) { return NV_ERR_INVALID_ARGUMENT; } // // If fd >= 0, dma-buf already exists with this fd, so get dma-buf from fd. // If fd == -1, dma-buf is not created yet, so create it and then store // additional handles. // if (params->fd == -1) { status = nv_dma_buf_create(nv, params); } else if (params->fd >= 0) { status = nv_dma_buf_reuse(nv, params); } else { status = NV_ERR_INVALID_ARGUMENT; } return status; #else return NV_ERR_NOT_SUPPORTED; #endif // CONFIG_DMA_SHARED_BUFFER } NV_STATUS NV_API_CALL nv_dma_import_dma_buf ( nv_dma_device_t *dma_dev, struct dma_buf *dma_buf, NvBool is_ro_device_map, NvU32 *size, struct sg_table **sgt, nv_dma_buf_t **import_priv ) { #if defined(CONFIG_DMA_SHARED_BUFFER) nv_dma_buf_t *nv_dma_buf = NULL; struct dma_buf_attachment *dma_attach = NULL; struct sg_table *map_sgt = NULL; NV_STATUS status = NV_OK; if ((dma_dev == NULL) || (dma_buf == NULL) || (size == NULL) || (sgt == NULL) || (import_priv == NULL)) { nv_printf(NV_DBG_ERRORS, "Import arguments are NULL!\n"); return NV_ERR_INVALID_ARGUMENT; } status = os_alloc_mem((void **)&nv_dma_buf, sizeof(*nv_dma_buf)); if (status != NV_OK) { nv_printf(NV_DBG_ERRORS, "Can't allocate mem for nv_buf!\n"); return status; } get_dma_buf(dma_buf); dma_attach = dma_buf_attach(dma_buf, dma_dev->dev); if (IS_ERR_OR_NULL(dma_attach)) { nv_printf(NV_DBG_ERRORS, "Can't attach dma_buf!\n"); status = NV_ERR_OPERATING_SYSTEM; goto dma_buf_attach_fail; } if (is_ro_device_map) { // Try RO only dma mapping. nv_dma_buf->direction = DMA_TO_DEVICE; nv_printf(NV_DBG_INFO, "NVRM: nv_dma_import_dma_buf -Try RO [DMA_TO_DEVICE] only mapping \n"); } else { nv_dma_buf->direction = DMA_BIDIRECTIONAL; } map_sgt = dma_buf_map_attachment(dma_attach, nv_dma_buf->direction); if (IS_ERR_OR_NULL(map_sgt)) { nv_printf(NV_DBG_ERRORS, "Can't map dma attachment!\n"); status = NV_ERR_OPERATING_SYSTEM; goto dma_buf_map_fail; } nv_dma_buf->dma_buf = dma_buf; nv_dma_buf->dma_attach = dma_attach; nv_dma_buf->sgt = map_sgt; *size = dma_buf->size; *import_priv = nv_dma_buf; *sgt = map_sgt; return NV_OK; dma_buf_map_fail: dma_buf_detach(dma_buf, dma_attach); dma_buf_attach_fail: os_free_mem(nv_dma_buf); dma_buf_put(dma_buf); return status; #else return NV_ERR_NOT_SUPPORTED; #endif // CONFIG_DMA_SHARED_BUFFER } NV_STATUS NV_API_CALL nv_dma_import_from_fd ( nv_dma_device_t *dma_dev, NvS32 fd, NvBool is_ro_device_map, NvU32 *size, struct sg_table **sgt, nv_dma_buf_t **import_priv ) { #if defined(CONFIG_DMA_SHARED_BUFFER) struct dma_buf *dma_buf = dma_buf_get(fd); NV_STATUS status; if (IS_ERR_OR_NULL(dma_buf)) { nv_printf(NV_DBG_ERRORS, "Can't get dma_buf from fd!\n"); return NV_ERR_OPERATING_SYSTEM; } status = nv_dma_import_dma_buf(dma_dev, dma_buf, is_ro_device_map, size, sgt, import_priv); dma_buf_put(dma_buf); return status; #else return NV_ERR_NOT_SUPPORTED; #endif // CONFIG_DMA_SHARED_BUFFER } void NV_API_CALL nv_dma_release_dma_buf ( nv_dma_buf_t *import_priv ) { #if defined(CONFIG_DMA_SHARED_BUFFER) nv_dma_buf_t *nv_dma_buf = NULL; if (import_priv == NULL) { return; } nv_dma_buf = (nv_dma_buf_t *)import_priv; dma_buf_unmap_attachment(nv_dma_buf->dma_attach, nv_dma_buf->sgt, nv_dma_buf->direction); dma_buf_detach(nv_dma_buf->dma_buf, nv_dma_buf->dma_attach); dma_buf_put(nv_dma_buf->dma_buf); os_free_mem(nv_dma_buf); #endif // CONFIG_DMA_SHARED_BUFFER }