From d802ae96a70174bf8f33552501b81c267d204a7d Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 9 Sep 2021 15:51:24 +0200 Subject: [PATCH] drm/tegra: Implement correct DMA-BUF semantics DMA-BUF requires that each device that accesses a DMA-BUF attaches to it separately. To do so the host1x_bo_pin() and host1x_bo_unpin() functions need to be reimplemented so that they can return a mapping, which either represents an attachment or a map of the driver's own GEM object. Bug 200768479 Signed-off-by: Thierry Reding Change-Id: Ia380b7dcc371ce47f5f35d44a60fbd6b4ab9d636 Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2596398 (cherry picked from commit 28960586000fca025689edfd45645ab28e497bca) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2620137 Tested-by: Jonathan Hunter Tested-by: mobile promotions Reviewed-by: Jonathan Hunter Reviewed-by: svc_kernel_abi Reviewed-by: Mikko Perttunen Reviewed-by: mobile promotions GVS: Gerrit_Virtual_Submit --- .../gpu/host1x/include/linux/host1x-next.h | 31 ++-- drivers/gpu/host1x/job.c | 161 +++++++----------- drivers/gpu/host1x/job.h | 6 +- 3 files changed, 80 insertions(+), 118 deletions(-) diff --git a/drivers/gpu/host1x/include/linux/host1x-next.h b/drivers/gpu/host1x/include/linux/host1x-next.h index 7987d219..5198e54b 100644 --- a/drivers/gpu/host1x/include/linux/host1x-next.h +++ b/drivers/gpu/host1x/include/linux/host1x-next.h @@ -7,6 +7,7 @@ #define __LINUX_HOST1X_H #include +#include #include enum host1x_class { @@ -84,12 +85,24 @@ struct host1x_client { struct host1x_bo; struct sg_table; +struct host1x_bo_mapping { + struct dma_buf_attachment *attach; + enum dma_data_direction direction; + struct list_head list; + struct host1x_bo *bo; + struct sg_table *sgt; + unsigned int chunks; + struct device *dev; + dma_addr_t phys; + size_t size; +}; + struct host1x_bo_ops { struct host1x_bo *(*get)(struct host1x_bo *bo); void (*put)(struct host1x_bo *bo); - struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo, - dma_addr_t *phys); - void (*unpin)(struct device *dev, struct sg_table *sgt); + struct host1x_bo_mapping *(*pin)(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction direction); + void (*unpin)(struct host1x_bo_mapping *map); void *(*mmap)(struct host1x_bo *bo); void (*munmap)(struct host1x_bo *bo, void *addr); }; @@ -114,17 +127,15 @@ static inline void host1x_bo_put(struct host1x_bo *bo) bo->ops->put(bo); } -static inline struct sg_table *host1x_bo_pin(struct device *dev, - struct host1x_bo *bo, - dma_addr_t *phys) +static inline struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir) { - return bo->ops->pin(dev, bo, phys); + return bo->ops->pin(dev, bo, dir); } -static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo, - struct sg_table *sgt) +static inline void host1x_bo_unpin(struct host1x_bo_mapping *map) { - bo->ops->unpin(dev, sgt); + map->bo->ops->unpin(map); } static inline void *host1x_bo_mmap(struct host1x_bo *bo) diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index ae484d86..3aa8854d 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -125,11 +125,11 @@ EXPORT_SYMBOL(host1x_job_add_wait); static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { + unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; struct host1x_client *client = job->client; struct device *dev = client->dev; struct host1x_job_gather *g; struct iommu_domain *domain; - struct sg_table *sgt; unsigned int i; int err; @@ -138,7 +138,9 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocs[i]; - dma_addr_t phys_addr, *phys; + enum dma_data_direction direction; + struct host1x_bo_mapping *map; + struct host1x_bo *bo; reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) { @@ -146,64 +148,44 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - /* - * If the client device is not attached to an IOMMU, the - * physical address of the buffer object can be used. - * - * Similarly, when an IOMMU domain is shared between all - * host1x clients, the IOVA is already available, so no - * need to map the buffer object again. - * - * XXX Note that this isn't always safe to do because it - * relies on an assumption that no cache maintenance is - * needed on the buffer objects. - */ - if (!domain || client->group) - phys = &phys_addr; - else - phys = NULL; + bo = reloc->target.bo; - sgt = host1x_bo_pin(dev, reloc->target.bo, phys); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); + switch (reloc->flags & mask) { + case HOST1X_RELOC_READ: + direction = DMA_TO_DEVICE; + break; + + case HOST1X_RELOC_WRITE: + direction = DMA_FROM_DEVICE; + break; + + case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: + direction = DMA_BIDIRECTIONAL; + break; + + default: + err = -EINVAL; goto unpin; } - if (sgt) { - unsigned long mask = HOST1X_RELOC_READ | - HOST1X_RELOC_WRITE; - enum dma_data_direction dir; - - switch (reloc->flags & mask) { - case HOST1X_RELOC_READ: - dir = DMA_TO_DEVICE; - break; - - case HOST1X_RELOC_WRITE: - dir = DMA_FROM_DEVICE; - break; - - case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: - dir = DMA_BIDIRECTIONAL; - break; - - default: - err = -EINVAL; - goto unpin; - } - - err = dma_map_sgtable(dev, sgt, dir, 0); - if (err) - goto unpin; - - job->unpins[job->num_unpins].dev = dev; - job->unpins[job->num_unpins].dir = dir; - phys_addr = sg_dma_address(sgt->sgl); + map = host1x_bo_pin(dev, bo, direction); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto unpin; } - job->addr_phys[job->num_unpins] = phys_addr; - job->unpins[job->num_unpins].bo = reloc->target.bo; - job->unpins[job->num_unpins].sgt = sgt; + /* + * host1x clients are generally not able to do scatter-gather themselves, so fail + * if the buffer is discontiguous and we fail to map its SG table to a single + * contiguous chunk of I/O virtual memory. + */ + if (map->chunks > 1) { + err = -EINVAL; + goto unpin; + } + + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; job->num_unpins++; } @@ -215,12 +197,11 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) return 0; for (i = 0; i < job->num_cmds; i++) { + struct host1x_bo_mapping *map; size_t gather_size = 0; struct scatterlist *sg; - dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; - dma_addr_t *phys; unsigned int j; if (job->cmds[i].is_wait) @@ -234,25 +215,16 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - /** - * If the host1x is not attached to an IOMMU, there is no need - * to map the buffer object for the host1x, since the physical - * address can simply be used. - */ - if (!iommu_get_domain_for_dev(host->dev)) - phys = &phys_addr; - else - phys = NULL; - - sgt = host1x_bo_pin(host->dev, g->bo, phys); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto put; + map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto unpin; } if (host->domain) { - for_each_sgtable_sg(sgt, sg, j) + for_each_sgtable_sg(map->sgt, sg, j) gather_size += sg->length; + gather_size = iova_align(&host->iova, gather_size); shift = iova_shift(&host->iova); @@ -263,33 +235,23 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto put; } - err = iommu_map_sgtable(host->domain, - iova_dma_addr(&host->iova, alloc), - sgt, IOMMU_READ); + err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc), + map->sgt, IOMMU_READ); if (err == 0) { __free_iova(&host->iova, alloc); err = -EINVAL; goto put; } - job->unpins[job->num_unpins].size = gather_size; - phys_addr = iova_dma_addr(&host->iova, alloc); - } else if (sgt) { - err = dma_map_sgtable(host->dev, sgt, DMA_TO_DEVICE, 0); - if (err) - goto put; - - job->unpins[job->num_unpins].dir = DMA_TO_DEVICE; - job->unpins[job->num_unpins].dev = host->dev; - phys_addr = sg_dma_address(sgt->sgl); + map->phys = iova_dma_addr(&host->iova, alloc); + map->size = gather_size; } - job->addr_phys[job->num_unpins] = phys_addr; - job->gather_addr_phys[i] = phys_addr; - - job->unpins[job->num_unpins].bo = g->bo; - job->unpins[job->num_unpins].sgt = sgt; + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; job->num_unpins++; + + job->gather_addr_phys[i] = map->phys; } return 0; @@ -681,23 +643,16 @@ void host1x_job_unpin(struct host1x_job *job) unsigned int i; for (i = 0; i < job->num_unpins; i++) { - struct host1x_job_unpin_data *unpin = &job->unpins[i]; - struct device *dev = unpin->dev ?: host->dev; - struct sg_table *sgt = unpin->sgt; + struct host1x_bo_mapping *map = job->unpins[i].map; + struct host1x_bo *bo = map->bo; - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && - unpin->size && host->domain) { - iommu_unmap(host->domain, job->addr_phys[i], - unpin->size); - free_iova(&host->iova, - iova_pfn(&host->iova, job->addr_phys[i])); + if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && map->size && host->domain) { + iommu_unmap(host->domain, job->addr_phys[i], map->size); + free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i])); } - if (unpin->dev && sgt) - dma_unmap_sgtable(unpin->dev, sgt, unpin->dir, 0); - - host1x_bo_unpin(dev, unpin->bo, sgt); - host1x_bo_put(unpin->bo); + host1x_bo_unpin(map); + host1x_bo_put(bo); } job->num_unpins = 0; diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 33adfaed..657e2400 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -33,11 +33,7 @@ struct host1x_job_cmd { }; struct host1x_job_unpin_data { - struct host1x_bo *bo; - struct sg_table *sgt; - struct device *dev; - size_t size; - enum dma_data_direction dir; + struct host1x_bo_mapping *map; }; /*