mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: Add sub-nvgpu_mem
Add an API for creating a special sub-nvgpu_mem struct. This struct comes with some fairly important caveats but is very useful for the semaphore code. Also, make sure that in nvgpu_mem_begin() and nvgpu_mem_end() no additional mappings are made if not necessary. More importantly during nvgpu_mem_end() it would be possible to vunmap() a CPU mapping of a DMA allocation that does not expect this to happen. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I579429da9ff7288488753a113bafc558e0f17a0f Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1464077 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
39524b0941
commit
6a14d980cf
@@ -334,7 +334,8 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
|
||||
{
|
||||
struct device *d = dev_from_gk20a(g);
|
||||
|
||||
if (mem->cpu_va || mem->priv.pages) {
|
||||
if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
|
||||
(mem->cpu_va || mem->priv.pages)) {
|
||||
if (mem->priv.flags) {
|
||||
DEFINE_DMA_ATTRS(dma_attrs);
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <nvgpu/dma.h>
|
||||
#include <nvgpu/nvgpu_mem.h>
|
||||
#include <nvgpu/page_allocator.h>
|
||||
#include <nvgpu/log.h>
|
||||
@@ -52,6 +53,14 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
|
||||
if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* A CPU mapping is implicitly made for all SYSMEM DMA allocations that
|
||||
* don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
|
||||
* another CPU mapping.
|
||||
*/
|
||||
if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
|
||||
return 0;
|
||||
|
||||
if (WARN_ON(mem->cpu_va)) {
|
||||
nvgpu_warn(g, "nested");
|
||||
return -EBUSY;
|
||||
@@ -73,6 +82,13 @@ void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
|
||||
if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
|
||||
* already made by the DMA API.
|
||||
*/
|
||||
if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
|
||||
return;
|
||||
|
||||
vunmap(mem->cpu_va);
|
||||
mem->cpu_va = NULL;
|
||||
}
|
||||
@@ -225,3 +241,57 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
|
||||
WARN_ON("Accessing unallocated nvgpu_mem");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Be careful how you use this! You are responsible for correctly freeing this
|
||||
* memory.
|
||||
*/
|
||||
int nvgpu_mem_create_from_mem(struct gk20a *g,
|
||||
struct nvgpu_mem *dest, struct nvgpu_mem *src,
|
||||
int start_page, int nr_pages)
|
||||
{
|
||||
int ret;
|
||||
u64 start = start_page * PAGE_SIZE;
|
||||
u64 size = nr_pages * PAGE_SIZE;
|
||||
dma_addr_t new_iova;
|
||||
|
||||
if (src->aperture != APERTURE_SYSMEM)
|
||||
return -EINVAL;
|
||||
|
||||
/* Some silly things a caller might do... */
|
||||
if (size > src->size)
|
||||
return -EINVAL;
|
||||
if ((start + size) > src->size)
|
||||
return -EINVAL;
|
||||
|
||||
dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY;
|
||||
dest->aperture = src->aperture;
|
||||
dest->skip_wmb = src->skip_wmb;
|
||||
dest->size = size;
|
||||
|
||||
/*
|
||||
* Re-use the CPU mapping only if the mapping was made by the DMA API.
|
||||
*/
|
||||
if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
|
||||
dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
|
||||
|
||||
dest->priv.pages = src->priv.pages + start_page;
|
||||
dest->priv.flags = src->priv.flags;
|
||||
|
||||
new_iova = sg_dma_address(src->priv.sgt->sgl) ?
|
||||
sg_dma_address(src->priv.sgt->sgl) + start : 0;
|
||||
|
||||
/*
|
||||
* Make a new SG table that is based only on the subset of pages that
|
||||
* is passed to us. This table gets freed by the dma free routines.
|
||||
*/
|
||||
if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
|
||||
ret = gk20a_get_sgtable_from_pages(g->dev, &dest->priv.sgt,
|
||||
src->priv.pages + start_page,
|
||||
new_iova, size);
|
||||
else
|
||||
ret = gk20a_get_sgtable(g->dev, &dest->priv.sgt, dest->cpu_va,
|
||||
new_iova, size);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -48,6 +48,13 @@ struct nvgpu_mem {
|
||||
u64 gpu_va;
|
||||
bool skip_wmb;
|
||||
|
||||
/*
|
||||
* Set when a nvgpu_mem struct is not a "real" nvgpu_mem struct. Instead
|
||||
* the struct is just a copy of another nvgpu_mem struct.
|
||||
*/
|
||||
#define NVGPU_MEM_FLAG_SHADOW_COPY (1 << 0)
|
||||
unsigned long mem_flags;
|
||||
|
||||
/*
|
||||
* Only populated for a sysmem allocation.
|
||||
*/
|
||||
@@ -86,6 +93,42 @@ static inline const char *nvgpu_aperture_str(enum nvgpu_aperture aperture)
|
||||
return "UNKNOWN";
|
||||
}
|
||||
|
||||
/**
|
||||
* nvgpu_mem_create_from_mem - Create a new nvgpu_mem struct from an old one.
|
||||
*
|
||||
* @g - The GPU.
|
||||
* @dest - Destination nvgpu_mem to hold resulting memory description.
|
||||
* @src - Source memory. Must be valid.
|
||||
* @start_page - Starting page to use.
|
||||
* @nr_pages - Number of pages to place in the new nvgpu_mem.
|
||||
*
|
||||
* Create a new nvgpu_mem struct describing a subsection of the @src nvgpu_mem.
|
||||
* This will create an nvpgu_mem object starting at @start_page and is @nr_pages
|
||||
* long. This currently only works on SYSMEM nvgpu_mems. If this is called on a
|
||||
* VIDMEM nvgpu_mem then this will return an error.
|
||||
*
|
||||
* There is a _major_ caveat to this API: if the source buffer is freed before
|
||||
* the copy is freed then the copy will become invalid. This is a result from
|
||||
* how typical DMA APIs work: we can't call free on the buffer multiple times.
|
||||
* Nor can we call free on parts of a buffer. Thus the only way to ensure that
|
||||
* the entire buffer is actually freed is to call free once on the source
|
||||
* buffer. Since these nvgpu_mem structs are not ref-counted in anyway it is up
|
||||
* to the caller of this API to _ensure_ that the resulting nvgpu_mem buffer
|
||||
* from this API is freed before the source buffer. Otherwise there can and will
|
||||
* be memory corruption.
|
||||
*
|
||||
* The resulting nvgpu_mem should be released with the nvgpu_dma_free() or the
|
||||
* nvgpu_dma_unmap_free() function depending on whether or not the resulting
|
||||
* nvgpu_mem has been mapped.
|
||||
*
|
||||
* This will return 0 on success. An error is returned if the resulting
|
||||
* nvgpu_mem would not make sense or if a new scatter gather table cannot be
|
||||
* created.
|
||||
*/
|
||||
int nvgpu_mem_create_from_mem(struct gk20a *g,
|
||||
struct nvgpu_mem *dest, struct nvgpu_mem *src,
|
||||
int start_page, int nr_pages);
|
||||
|
||||
/*
|
||||
* Buffer accessors - wrap between begin() and end() if there is no permanent
|
||||
* kernel mapping for this buffer.
|
||||
|
||||
Reference in New Issue
Block a user