mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 18:16:01 +03:00
gpu: nvgpu: add vidmem allocation ioctl
Add NVGPU_GPU_IOCTL_ALLOC_VIDMEM to the ctrl fd for letting userspace allocate on-board GPU memory (aka vidmem). The allocations are returned as dmabuf fds. Also, report the amount of local video memory in the gpu characteristics. Jira DNVGPU-19 Jira DNVGPU-38 Change-Id: I28e361d31bb630b96d06bb1c86d022d91c7592bc Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1181152 GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
This commit is contained in:
committed by
Vijayakumar Subbu
parent
83071083d7
commit
1323100667
@@ -704,6 +704,49 @@ clean_up:
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int nvgpu_gpu_alloc_vidmem(struct gk20a *g,
|
||||||
|
struct nvgpu_gpu_alloc_vidmem_args *args)
|
||||||
|
{
|
||||||
|
u32 align = args->in.alignment ? args->in.alignment : SZ_4K;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
/* not yet supported */
|
||||||
|
if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/* not yet supported */
|
||||||
|
if (WARN_ON(args->in.flags & NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (args->in.size & (SZ_4K - 1))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!args->in.size)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (align & (align - 1))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (align > roundup_pow_of_two(args->in.size)) {
|
||||||
|
/* log this special case, buddy allocator detail */
|
||||||
|
gk20a_warn(dev_from_gk20a(g),
|
||||||
|
"alignment larger than buffer size rounded up to power of 2 is not supported");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
fd = gk20a_vidmem_buf_alloc(g, args->in.size);
|
||||||
|
if (fd < 0)
|
||||||
|
return fd;
|
||||||
|
|
||||||
|
args->out.dmabuf_fd = fd;
|
||||||
|
|
||||||
|
gk20a_dbg_fn("done, fd=%d", fd);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||||
{
|
{
|
||||||
struct device *dev = filp->private_data;
|
struct device *dev = filp->private_data;
|
||||||
@@ -951,6 +994,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
|
|||||||
(struct nvgpu_gpu_get_engine_info_args *)buf);
|
(struct nvgpu_gpu_get_engine_info_args *)buf);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case NVGPU_GPU_IOCTL_ALLOC_VIDMEM:
|
||||||
|
err = nvgpu_gpu_alloc_vidmem(g,
|
||||||
|
(struct nvgpu_gpu_alloc_vidmem_args *)buf);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
|
dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
|
||||||
err = -ENOTTY;
|
err = -ENOTTY;
|
||||||
|
|||||||
@@ -2148,6 +2148,7 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
|
|||||||
gpu->default_compute_preempt_mode =
|
gpu->default_compute_preempt_mode =
|
||||||
g->gr.preemption_mode_rec.default_compute_preempt_mode;
|
g->gr.preemption_mode_rec.default_compute_preempt_mode;
|
||||||
|
|
||||||
|
gpu->local_video_memory_size = g->mm.vidmem.size;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -411,6 +411,14 @@ struct gk20a_dmabuf_priv {
|
|||||||
u64 buffer_id;
|
u64 buffer_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct gk20a_vidmem_buf {
|
||||||
|
struct gk20a *g;
|
||||||
|
struct mem_desc mem;
|
||||||
|
struct dma_buf *dmabuf;
|
||||||
|
void *dmabuf_priv;
|
||||||
|
void (*dmabuf_priv_delete)(void *);
|
||||||
|
};
|
||||||
|
|
||||||
static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm);
|
static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm);
|
||||||
|
|
||||||
static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
|
static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator,
|
||||||
@@ -1833,6 +1841,146 @@ static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
|
|||||||
return mapped_buffer->addr;
|
return mapped_buffer->addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(CONFIG_GK20A_VIDMEM)
|
||||||
|
static struct sg_table *gk20a_vidbuf_map_dma_buf(
|
||||||
|
struct dma_buf_attachment *attach, enum dma_data_direction dir)
|
||||||
|
{
|
||||||
|
struct gk20a_vidmem_buf *buf = attach->dmabuf->priv;
|
||||||
|
|
||||||
|
return buf->mem.sgt;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach,
|
||||||
|
struct sg_table *sgt,
|
||||||
|
enum dma_data_direction dir)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gk20a_vidbuf_release(struct dma_buf *dmabuf)
|
||||||
|
{
|
||||||
|
struct gk20a_vidmem_buf *buf = dmabuf->priv;
|
||||||
|
|
||||||
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
if (buf->dmabuf_priv)
|
||||||
|
buf->dmabuf_priv_delete(buf->dmabuf_priv);
|
||||||
|
|
||||||
|
gk20a_gmmu_free(buf->g, &buf->mem);
|
||||||
|
kfree(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num)
|
||||||
|
{
|
||||||
|
WARN_ON("Not supported");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf,
|
||||||
|
unsigned long page_num)
|
||||||
|
{
|
||||||
|
WARN_ON("Not supported");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int gk20a_vidbuf_set_private(struct dma_buf *dmabuf,
|
||||||
|
struct device *dev, void *priv, void (*delete)(void *priv))
|
||||||
|
{
|
||||||
|
struct gk20a_vidmem_buf *buf = dmabuf->priv;
|
||||||
|
|
||||||
|
buf->dmabuf_priv = priv;
|
||||||
|
buf->dmabuf_priv_delete = delete;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *gk20a_vidbuf_get_private(struct dma_buf *dmabuf,
|
||||||
|
struct device *dev)
|
||||||
|
{
|
||||||
|
struct gk20a_vidmem_buf *buf = dmabuf->priv;
|
||||||
|
|
||||||
|
return buf->dmabuf_priv;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct dma_buf_ops gk20a_vidbuf_ops = {
|
||||||
|
.map_dma_buf = gk20a_vidbuf_map_dma_buf,
|
||||||
|
.unmap_dma_buf = gk20a_vidbuf_unmap_dma_buf,
|
||||||
|
.release = gk20a_vidbuf_release,
|
||||||
|
.kmap_atomic = gk20a_vidbuf_kmap_atomic,
|
||||||
|
.kmap = gk20a_vidbuf_kmap,
|
||||||
|
.mmap = gk20a_vidbuf_mmap,
|
||||||
|
.set_drvdata = gk20a_vidbuf_set_private,
|
||||||
|
.get_drvdata = gk20a_vidbuf_get_private,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct dma_buf *gk20a_vidbuf_export(struct gk20a_vidmem_buf *buf)
|
||||||
|
{
|
||||||
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
|
||||||
|
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
|
||||||
|
|
||||||
|
exp_info.priv = buf;
|
||||||
|
exp_info.ops = &gk20a_vidbuf_ops;
|
||||||
|
exp_info.size = buf->mem.size;
|
||||||
|
exp_info.flags = O_RDWR;
|
||||||
|
|
||||||
|
return dma_buf_export(&exp_info);
|
||||||
|
#else
|
||||||
|
return dma_buf_export(buf, &gk20a_vidbuf_ops, buf->mem.size,
|
||||||
|
O_RDWR, NULL);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes)
|
||||||
|
{
|
||||||
|
#if defined(CONFIG_GK20A_VIDMEM)
|
||||||
|
struct gk20a_vidmem_buf *buf;
|
||||||
|
int err, fd;
|
||||||
|
|
||||||
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
buf = kzalloc(sizeof(*buf), GFP_KERNEL);
|
||||||
|
if (!buf)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
buf->g = g;
|
||||||
|
|
||||||
|
err = gk20a_gmmu_alloc_vid(g, bytes, &buf->mem);
|
||||||
|
if (err)
|
||||||
|
goto err_kfree;
|
||||||
|
|
||||||
|
buf->dmabuf = gk20a_vidbuf_export(buf);
|
||||||
|
if (IS_ERR(buf->dmabuf)) {
|
||||||
|
err = PTR_ERR(buf->dmabuf);
|
||||||
|
goto err_bfree;
|
||||||
|
}
|
||||||
|
|
||||||
|
fd = get_unused_fd_flags(O_RDWR);
|
||||||
|
if (fd < 0) {
|
||||||
|
/* ->release frees what we have done */
|
||||||
|
dma_buf_put(buf->dmabuf);
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fclose() on this drops one ref, freeing the dma buf */
|
||||||
|
fd_install(fd, buf->dmabuf->file);
|
||||||
|
|
||||||
|
return fd;
|
||||||
|
|
||||||
|
err_bfree:
|
||||||
|
gk20a_gmmu_free(g, &buf->mem);
|
||||||
|
err_kfree:
|
||||||
|
kfree(buf);
|
||||||
|
return err;
|
||||||
|
#else
|
||||||
|
return -ENOSYS;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
u64 gk20a_vm_map(struct vm_gk20a *vm,
|
u64 gk20a_vm_map(struct vm_gk20a *vm,
|
||||||
struct dma_buf *dmabuf,
|
struct dma_buf *dmabuf,
|
||||||
u64 offset_align,
|
u64 offset_align,
|
||||||
|
|||||||
@@ -718,6 +718,8 @@ void gk20a_vm_mapping_batch_finish_locked(
|
|||||||
struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
|
struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch);
|
||||||
|
|
||||||
|
|
||||||
|
int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes);
|
||||||
|
|
||||||
/* Note: batch may be NULL if map op is not part of a batch */
|
/* Note: batch may be NULL if map op is not part of a batch */
|
||||||
int gk20a_vm_map_buffer(struct vm_gk20a *vm,
|
int gk20a_vm_map_buffer(struct vm_gk20a *vm,
|
||||||
int dmabuf_fd,
|
int dmabuf_fd,
|
||||||
|
|||||||
@@ -196,6 +196,8 @@ struct nvgpu_gpu_characteristics {
|
|||||||
__u32 default_graphics_preempt_mode; /* NVGPU_GRAPHICS_PREEMPTION_MODE_* */
|
__u32 default_graphics_preempt_mode; /* NVGPU_GRAPHICS_PREEMPTION_MODE_* */
|
||||||
__u32 default_compute_preempt_mode; /* NVGPU_COMPUTE_PREEMPTION_MODE_* */
|
__u32 default_compute_preempt_mode; /* NVGPU_COMPUTE_PREEMPTION_MODE_* */
|
||||||
|
|
||||||
|
__u64 local_video_memory_size; /* in bytes, non-zero only for dGPUs */
|
||||||
|
|
||||||
/* Notes:
|
/* Notes:
|
||||||
- This struct can be safely appended with new fields. However, always
|
- This struct can be safely appended with new fields. However, always
|
||||||
keep the structure size multiple of 8 and make sure that the binary
|
keep the structure size multiple of 8 and make sure that the binary
|
||||||
@@ -434,6 +436,72 @@ struct nvgpu_gpu_get_engine_info_args {
|
|||||||
__u64 engine_info_buf_addr;
|
__u64 engine_info_buf_addr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CONTIGUOUS (1U << 0)
|
||||||
|
|
||||||
|
/* CPU access and coherency flags (3 bits). Use CPU access with care,
|
||||||
|
* BAR resources are scarce. */
|
||||||
|
#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_NOT_MAPPABLE (0U << 1)
|
||||||
|
#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_WRITE_COMBINE (1U << 1)
|
||||||
|
#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_CACHED (2U << 1)
|
||||||
|
#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_CPU_MASK (7U << 1)
|
||||||
|
|
||||||
|
#define NVGPU_GPU_ALLOC_VIDMEM_FLAG_VPR (1U << 4)
|
||||||
|
|
||||||
|
/* Allocation of device-specific local video memory. Returns dmabuf fd
|
||||||
|
* on success. */
|
||||||
|
struct nvgpu_gpu_alloc_vidmem_args {
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
/* Size for allocation. Must be a multiple of
|
||||||
|
* small page size. */
|
||||||
|
__u64 size;
|
||||||
|
|
||||||
|
/* NVGPU_GPU_ALLOC_VIDMEM_FLAG_* */
|
||||||
|
__u32 flags;
|
||||||
|
|
||||||
|
/* Informational mem tag for resource usage
|
||||||
|
* tracking. */
|
||||||
|
__u16 memtag;
|
||||||
|
|
||||||
|
__u16 reserved0;
|
||||||
|
|
||||||
|
/* GPU-visible physical memory alignment in
|
||||||
|
* bytes.
|
||||||
|
*
|
||||||
|
* Alignment must be a power of two. Minimum
|
||||||
|
* alignment is the small page size, which 0
|
||||||
|
* also denotes.
|
||||||
|
*
|
||||||
|
* For contiguous and non-contiguous
|
||||||
|
* allocations, the start address of the
|
||||||
|
* physical memory allocation will be aligned
|
||||||
|
* by this value.
|
||||||
|
*
|
||||||
|
* For non-contiguous allocations, memory is
|
||||||
|
* internally allocated in round_up(size /
|
||||||
|
* alignment) contiguous blocks. The start
|
||||||
|
* address of each block is aligned by the
|
||||||
|
* alignment value. If the size is not a
|
||||||
|
* multiple of alignment (which is ok), the
|
||||||
|
* last allocation block size is (size %
|
||||||
|
* alignment).
|
||||||
|
*
|
||||||
|
* By specifying the big page size here and
|
||||||
|
* allocation size that is a multiple of big
|
||||||
|
* pages, it will be guaranteed that the
|
||||||
|
* allocated buffer is big page size mappable.
|
||||||
|
*/
|
||||||
|
__u32 alignment;
|
||||||
|
|
||||||
|
__u32 reserved1[3];
|
||||||
|
} in;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__s32 dmabuf_fd;
|
||||||
|
} out;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
||||||
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
||||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
||||||
@@ -489,8 +557,11 @@ struct nvgpu_gpu_get_engine_info_args {
|
|||||||
#define NVGPU_GPU_IOCTL_GET_ENGINE_INFO \
|
#define NVGPU_GPU_IOCTL_GET_ENGINE_INFO \
|
||||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 26, \
|
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 26, \
|
||||||
struct nvgpu_gpu_get_engine_info_args)
|
struct nvgpu_gpu_get_engine_info_args)
|
||||||
|
#define NVGPU_GPU_IOCTL_ALLOC_VIDMEM \
|
||||||
|
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 27, \
|
||||||
|
struct nvgpu_gpu_alloc_vidmem_args)
|
||||||
#define NVGPU_GPU_IOCTL_LAST \
|
#define NVGPU_GPU_IOCTL_LAST \
|
||||||
_IOC_NR(NVGPU_GPU_IOCTL_GET_ENGINE_INFO)
|
_IOC_NR(NVGPU_GPU_IOCTL_ALLOC_VIDMEM)
|
||||||
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
||||||
sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
|
sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user