Files
linux-nv-oot/drivers/video/tegra/host/nvdla/dla_queue.c
Arvind M 29ec18d2bc nvdla: kmd: fix copyright header license to GPL v2
Bug 5065840

Change-Id: I2012645108457974e24d3c8799ee9412393408e1
Signed-off-by: Arvind M <am@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3291589
Reviewed-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-by: svcacv <svcacv@nvidia.com>
GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com>
Reviewed-by: Vishal Thoke <vthoke@nvidia.com>
Reviewed-by: Amit Sharma (SW-TEGRA) <amisharma@nvidia.com>
Reviewed-by: Ken Adams <kadams@nvidia.com>
2025-07-24 10:19:14 +00:00

656 lines
16 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVDLA queue management
*/
#include <linux/arm64-barrier.h>
#include <linux/platform_device.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/dma-mapping.h>
#include <linux/debugfs.h>
#include "port/nvdla_host_wrapper.h"
#if IS_ENABLED(CONFIG_TEGRA_NVDLA_CHANNEL)
#include "nvhost_job.h"
#endif
#include "dla_channel.h"
#include "dla_queue.h"
#include "nvdla_debug.h"
#define CMDBUF_SIZE 4096
/**
* @brief Describe a task pool struct
*
* Array of fixed task memory is allocated during queue_alloc call.
* The memory will be shared for various task based on availability
*
* dma_addr Physical address of task memory pool
* va Virtual address of the task memory pool
* kmem_addr Kernel memory for task struct
* lock Mutex lock for the array access.
* alloc_table Keep track of the index being assigned
* and freed for a task
* max_task_cnt Maximum task count that can be supported.
*/
struct nvdla_queue_task_pool {
dma_addr_t dma_addr;
void *va;
void *kmem_addr;
struct mutex lock;
unsigned long alloc_table;
unsigned long max_task_cnt;
};
static int nvdla_queue_task_pool_alloc(struct platform_device *pdev,
struct nvdla_queue *queue,
unsigned int num_tasks)
{
int err = 0;
struct nvdla_queue_task_pool *task_pool;
task_pool = queue->task_pool;
/* Allocate the kernel memory needed for the task */
if (queue->task_kmem_size) {
size_t kmem_pool_size = num_tasks * queue->task_kmem_size;
task_pool->kmem_addr = vzalloc(kmem_pool_size);
if (!task_pool->kmem_addr) {
dev_err(&pdev->dev,
"failed to allocate task_pool->kmem_addr\n");
err = -ENOMEM;
goto err_alloc_task_kmem;
}
}
/* Allocate memory for the task itself */
task_pool->va = dma_alloc_attrs(&pdev->dev,
queue->task_dma_size * num_tasks,
&task_pool->dma_addr, GFP_KERNEL,
0);
if (task_pool->va == NULL) {
dev_err(&pdev->dev, "failed to allocate task_pool->va\n");
err = -ENOMEM;
goto err_alloc_task_pool;
}
task_pool->max_task_cnt = num_tasks;
mutex_init(&task_pool->lock);
return err;
err_alloc_task_pool:
vfree(task_pool->kmem_addr);
err_alloc_task_kmem:
return err;
}
static void nvdla_queue_task_free_pool(struct platform_device *pdev,
struct nvdla_queue *queue)
{
struct nvdla_queue_task_pool *task_pool =
(struct nvdla_queue_task_pool *)queue->task_pool;
dma_free_attrs(&queue->vm_pdev->dev,
queue->task_dma_size * task_pool->max_task_cnt,
task_pool->va, task_pool->dma_addr,
0);
vfree(task_pool->kmem_addr);
task_pool->max_task_cnt = 0;
task_pool->alloc_table = 0;
}
static int nvdla_queue_dump(struct nvdla_queue_pool *pool,
struct nvdla_queue *queue,
struct seq_file *s)
{
if (pool->ops && pool->ops->dump)
pool->ops->dump(queue, s);
return 0;
}
static int queue_dump(struct seq_file *s, void *data)
{
struct nvdla_queue_pool *pool = s->private;
unsigned long queue_id;
mutex_lock(&pool->queue_lock);
for_each_set_bit(queue_id, &pool->alloc_table,
pool->max_queue_cnt)
nvdla_queue_dump(pool, &pool->queues[queue_id], s);
mutex_unlock(&pool->queue_lock);
return 0;
}
static int queue_expose_open(struct inode *inode, struct file *file)
{
return single_open(file, queue_dump, inode->i_private);
}
static const struct file_operations queue_expose_operations = {
.open = queue_expose_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
struct nvdla_queue_pool *nvdla_queue_init(struct platform_device *pdev,
struct nvdla_queue_ops *ops,
unsigned int num_queues)
{
struct nvhost_device_data *pdata;
struct nvdla_queue_pool *pool;
struct nvdla_queue *queues;
struct nvdla_queue *queue;
struct nvdla_queue_task_pool *task_pool;
unsigned int i;
int err;
pool = kzalloc(sizeof(struct nvdla_queue_pool), GFP_KERNEL);
if (pool == NULL) {
dev_err(&pdev->dev, "failed to allocate queue pool\n");
err = -ENOMEM;
goto fail_alloc_pool;
}
queues = kcalloc(num_queues, sizeof(struct nvdla_queue), GFP_KERNEL);
if (queues == NULL) {
dev_err(&pdev->dev, "failed to allocate queues\n");
err = -ENOMEM;
goto fail_alloc_queues;
}
task_pool = kcalloc(num_queues,
sizeof(struct nvdla_queue_task_pool), GFP_KERNEL);
if (task_pool == NULL) {
dev_err(&pdev->dev, "failed to allocate task_pool\n");
err = -ENOMEM;
goto fail_alloc_task_pool;
}
pdata = platform_get_drvdata(pdev);
/* initialize pool and queues */
pool->pdev = pdev;
pool->ops = ops;
pool->queues = queues;
pool->alloc_table = 0;
pool->max_queue_cnt = num_queues;
pool->queue_task_pool = task_pool;
mutex_init(&pool->queue_lock);
debugfs_create_file("queues", S_IRUGO,
pdata->debugfs, pool,
&queue_expose_operations);
for (i = 0; i < num_queues; i++) {
queue = &queues[i];
queue->id = i;
queue->pool = pool;
queue->task_pool = (void *)&task_pool[i];
nvdla_queue_get_task_size(queue);
}
spec_bar(); /* break_spec_p#5_1 */
return pool;
fail_alloc_task_pool:
kfree(pool->queues);
fail_alloc_queues:
kfree(pool);
fail_alloc_pool:
return ERR_PTR(err);
}
void nvdla_queue_deinit(struct nvdla_queue_pool *pool)
{
if (!pool)
return;
kfree(pool->queue_task_pool);
kfree(pool->queues);
kfree(pool);
pool = NULL;
}
static void nvdla_queue_cleanup(struct nvdla_queue *queue)
{
struct nvdla_queue_pool *pool = queue->pool;
if (pool->ops && pool->ops->cleanup)
pool->ops->cleanup(queue);
}
static void nvdla_queue_cleanup_all(struct nvdla_queue_pool *pool)
{
u32 id;
mutex_lock(&pool->queue_lock);
for_each_set_bit(id, &pool->alloc_table, pool->max_queue_cnt)
nvdla_queue_cleanup(&pool->queues[id]);
mutex_unlock(&pool->queue_lock);
}
#ifdef CONFIG_PM
int nvdla_queue_pool_prepare_suspend(struct nvdla_queue_pool *qpool)
{
int err = 0;
unsigned int queue_id;
mutex_lock(&qpool->queue_lock);
/* For each active queues, ensure there are no outstanding tasks. */
for_each_set_bit(queue_id, &qpool->alloc_table, qpool->max_queue_cnt) {
struct nvdla_queue *queue = &qpool->queues[queue_id];
struct nvdla_queue_task_pool *tpool = queue->task_pool;
bool nvdla_queue_is_idle;
/* Cleanup the queue before checking the idleness. */
nvdla_queue_cleanup(queue);
mutex_lock(&tpool->lock);
nvdla_queue_is_idle = (tpool->alloc_table == 0ULL);
mutex_unlock(&tpool->lock);
if (!nvdla_queue_is_idle) {
err = -EBUSY;
goto fail_busy;
}
}
fail_busy:
mutex_unlock(&qpool->queue_lock);
return err;
}
#endif /* CONFIG_PM */
void nvdla_queue_abort_all(struct nvdla_queue_pool *pool)
{
u32 id;
mutex_lock(&pool->queue_lock);
for_each_set_bit(id, &pool->alloc_table, pool->max_queue_cnt)
nvdla_queue_abort(&pool->queues[id]);
mutex_unlock(&pool->queue_lock);
}
static void nvdla_queue_release(struct kref *ref)
{
struct nvdla_queue *queue = container_of(ref, struct nvdla_queue,
kref);
struct nvdla_queue_pool *pool = queue->pool;
nvdla_dbg_fn(pool->pdev, "%s\n", __func__);
if (queue->use_channel)
nvdla_putchannel(queue);
/* release allocated resources */
nvdla_sync_destroy(queue->sync_context);
/* free the task_pool */
if (queue->task_dma_size)
nvdla_queue_task_free_pool(pool->pdev, queue);
/* ..and mark the queue free */
mutex_lock(&pool->queue_lock);
clear_bit(queue->id, &pool->alloc_table);
mutex_unlock(&pool->queue_lock);
}
void nvdla_queue_put(struct nvdla_queue *queue)
{
nvdla_dbg_fn(queue->pool->pdev, "%s\n", __func__);
kref_put(&queue->kref, nvdla_queue_release);
}
void nvdla_queue_get(struct nvdla_queue *queue)
{
nvdla_dbg_fn(queue->pool->pdev, "%s\n", __func__);
kref_get(&queue->kref);
}
struct nvdla_queue *nvdla_queue_alloc(struct nvdla_queue_pool *pool,
unsigned int num_tasks,
bool use_channel)
{
struct platform_device *pdev = pool->pdev;
struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
struct nvdla_device *nvdla_dev = pdata->private_data;
struct nvdla_queue *queues = pool->queues;
struct nvdla_queue *queue;
int index = 0;
int err = 0;
mutex_lock(&pool->queue_lock);
index = find_first_zero_bit(&pool->alloc_table,
pool->max_queue_cnt);
/* Queue not found on first attempt. */
if (index >= pool->max_queue_cnt) {
mutex_unlock(&pool->queue_lock);
/* Cleanup and retry one more time before erroring out */
nvdla_queue_cleanup_all(pool);
mutex_lock(&pool->queue_lock);
index = find_first_zero_bit(&pool->alloc_table,
pool->max_queue_cnt);
if (index >= pool->max_queue_cnt) {
dev_err(&pdev->dev, "failed to get free Queue\n");
err = -ENOMEM;
goto err_alloc_queue;
}
}
spec_bar(); /* break_spec_p#1 */
/* reserve the queue */
queue = &queues[index];
set_bit(index, &pool->alloc_table);
/* allocate a sync context for the queue */
queue->sync_context = nvdla_sync_create(nvdla_dev->sync_dev);
if (queue->sync_context == NULL) {
dev_err(&pdev->dev, "failed to create sync context\n");
err = -ENOMEM;
goto err_alloc_sync;
}
/* initialize queue ref count and sequence*/
kref_init(&queue->kref);
queue->use_channel = use_channel;
queue->sequence = 0;
/* initialize task list */
INIT_LIST_HEAD(&queue->tasklist);
mutex_init(&queue->list_lock);
/* initialize task list */
queue->attr = NULL;
mutex_init(&queue->attr_lock);
mutex_unlock(&pool->queue_lock);
/* Check if the queue should allocate a channel */
if (use_channel) {
queue->vm_pdev = nvdla_channel_map(pdev, queue);
if (!queue->vm_pdev)
goto err_alloc_channel;
} else {
queue->vm_pdev = pdev;
}
if (queue->task_dma_size) {
err = nvdla_queue_task_pool_alloc(queue->vm_pdev,
queue,
num_tasks);
if (err < 0)
goto err_alloc_task_pool;
}
return queue;
err_alloc_task_pool:
if (use_channel)
nvdla_putchannel(queue);
err_alloc_channel:
mutex_lock(&pool->queue_lock);
nvdla_sync_destroy(queue->sync_context);
err_alloc_sync:
clear_bit(queue->id, &pool->alloc_table);
err_alloc_queue:
mutex_unlock(&pool->queue_lock);
return ERR_PTR(err);
}
int nvdla_queue_abort(struct nvdla_queue *queue)
{
struct nvdla_queue_pool *pool = queue->pool;
if (pool->ops && pool->ops->abort)
return pool->ops->abort(queue);
return 0;
}
int nvdla_queue_submit(struct nvdla_queue *queue, void *task_arg)
{
struct nvdla_queue_pool *pool = queue->pool;
if (pool->ops && pool->ops->submit)
return pool->ops->submit(queue, task_arg);
return 0;
}
int nvdla_queue_set_attr(struct nvdla_queue *queue, void *arg)
{
struct nvdla_queue_pool *pool = queue->pool;
if (pool->ops && pool->ops->set_attribute)
return pool->ops->set_attribute(queue, arg);
return 0;
}
struct nvdla_queue_task {
struct platform_device *host1x_pdev;
struct nvdla_queue *queue;
dma_addr_t dma_addr;
u32 *cpu_addr;
};
#if IS_ENABLED(CONFIG_TEGRA_NVDLA_CHANNEL)
static void queue_task_update(void *priv, int nr_completed)
{
struct nvdla_queue_task *task = priv;
struct platform_device *host1x_pdev = task->host1x_pdev;
dma_free_coherent(&host1x_pdev->dev,
CMDBUF_SIZE,
task->cpu_addr,
task->dma_addr);
kfree(task);
}
int nvdla_queue_submit_to_host1x(struct nvdla_queue *queue,
u32 *cmdbuf,
u32 num_cmdbuf_words,
u32 num_syncpt_incrs,
u32 *wait_syncpt_ids,
u32 *wait_syncpt_thresholds,
u32 num_syncpt_waits,
u32 *task_syncpt_threshold)
{
struct nvdla_queue_pool *pool = queue->pool;
struct platform_device *client_pdev = pool->pdev;
struct platform_device *host1x_pdev =
to_platform_device(client_pdev->dev.parent);
struct nvhost_device_data *pdata = platform_get_drvdata(client_pdev);
struct nvdla_queue_task *task;
struct nvhost_job *job;
unsigned int i;
int err = 0;
if (queue->use_channel == false)
return -EINVAL;
/* Allocate memory for the task and task command buffer */
task = kzalloc(sizeof(*task), GFP_KERNEL);
if (task == NULL) {
dev_err(&client_pdev->dev, "failed to allocate task\n");
goto err_alloc_task;
}
task->cpu_addr = dma_alloc_coherent(&host1x_pdev->dev,
CMDBUF_SIZE,
&task->dma_addr,
GFP_KERNEL);
if (task->cpu_addr == NULL) {
dev_err(&client_pdev->dev, "failed to allocate task\n");
err = -ENOMEM;
goto err_alloc_cmdbuf;
}
/* Copy the command buffer */
memcpy(task->cpu_addr, cmdbuf, num_cmdbuf_words * 4);
job = nvhost_job_alloc(queue->channel,
1,
0,
num_syncpt_waits,
1);
if (job == NULL) {
err = -ENOMEM;
goto err_alloc_job;
}
task->queue = queue;
task->host1x_pdev = host1x_pdev;
/* Write waits to the job */
job->num_waitchk = num_syncpt_waits;
for (i = 0; i < num_syncpt_waits; i++) {
job->waitchk[i].syncpt_id = wait_syncpt_ids[i];
job->waitchk[i].thresh = wait_syncpt_thresholds[i];
job->waitchk[i].mem = 0;
}
spec_bar(); /* break_spec_p#5_1 */
/* Initialize syncpoint increments */
job->sp->id = queue->syncpt_id;
job->sp->incrs = num_syncpt_incrs;
job->num_syncpts = 1;
/* Add the command buffer */
nvhost_job_add_client_gather_address(job,
num_cmdbuf_words,
pdata->class,
task->dma_addr);
/* Submit task to hardware */
err = nvhost_channel_submit(job);
if (err < 0)
goto err_submit_job;
/* Return the number of increments back to the caller */
*task_syncpt_threshold = job->sp->fence;
/* Register a callback function for releasing resources */
err = nvhost_intr_register_notifier(host1x_pdev,
queue->syncpt_id,
job->sp->fence,
queue_task_update, task);
if (err < 0) {
dev_err(&client_pdev->dev, "failed to register notifier err=%d",
err);
goto err_register_notifier;
}
/* nvhost keeps a reference on the job and we don't
* need to access it anymore
*/
nvhost_job_put(job);
return 0;
err_register_notifier:
err_submit_job:
nvhost_job_put(job);
err_alloc_job:
dma_free_coherent(&host1x_pdev->dev, CMDBUF_SIZE, task->cpu_addr,
task->dma_addr);
err_alloc_cmdbuf:
kfree(task);
err_alloc_task:
return err;
}
#endif
int nvdla_queue_get_task_size(struct nvdla_queue *queue)
{
struct nvdla_queue_pool *pool = queue->pool;
if (pool->ops && pool->ops->get_task_size)
pool->ops->get_task_size(&queue->task_dma_size,
&queue->task_kmem_size);
return 0;
}
int nvdla_queue_alloc_task_memory(
struct nvdla_queue *queue,
struct nvdla_queue_task_mem_info *task_mem_info)
{
int err = 0;
int index, hw_offset, sw_offset;
struct platform_device *pdev = queue->pool->pdev;
struct nvdla_queue_task_pool *task_pool =
(struct nvdla_queue_task_pool *)queue->task_pool;
mutex_lock(&task_pool->lock);
index = find_first_zero_bit(&task_pool->alloc_table,
task_pool->max_task_cnt);
/* quit if pre-allocated task array is not free */
if (index >= task_pool->max_task_cnt) {
dev_err(&pdev->dev, "failed to get Task Pool Memory\n");
err = -EAGAIN;
goto err_alloc_task_mem;
}
/* assign the task array */
set_bit(index, &task_pool->alloc_table);
hw_offset = index * queue->task_dma_size;
sw_offset = index * queue->task_kmem_size;
task_mem_info->kmem_addr =
(void *)((u8 *)task_pool->kmem_addr + sw_offset);
task_mem_info->va = (void *)((u8 *)task_pool->va + hw_offset);
task_mem_info->dma_addr = task_pool->dma_addr + hw_offset;
task_mem_info->pool_index = index;
err_alloc_task_mem:
mutex_unlock(&task_pool->lock);
return err;
}
void nvdla_queue_free_task_memory(struct nvdla_queue *queue, int index)
{
int hw_offset, sw_offset;
u8 *task_kmem, *task_dma_va;
struct nvdla_queue_task_pool *task_pool =
(struct nvdla_queue_task_pool *)queue->task_pool;
/* clear task kernel and dma virtual memory contents*/
hw_offset = index * queue->task_dma_size;
sw_offset = index * queue->task_kmem_size;
task_kmem = (u8 *)task_pool->kmem_addr + sw_offset;
task_dma_va = (u8 *)task_pool->va + hw_offset;
memset(task_kmem, 0, queue->task_kmem_size);
memset(task_dma_va, 0, queue->task_dma_size);
mutex_lock(&task_pool->lock);
clear_bit(index, &task_pool->alloc_table);
mutex_unlock(&task_pool->lock);
}