// SPDX-License-Identifier: GPL-2.0-only /* SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * NVDLA queue management */ #include #include #include #include #include #include #include #include #include "port/nvdla_host_wrapper.h" #if IS_ENABLED(CONFIG_TEGRA_NVDLA_CHANNEL) #include "nvhost_job.h" #endif #include "dla_channel.h" #include "dla_queue.h" #include "nvdla_debug.h" #define CMDBUF_SIZE 4096 /** * @brief Describe a task pool struct * * Array of fixed task memory is allocated during queue_alloc call. * The memory will be shared for various task based on availability * * dma_addr Physical address of task memory pool * va Virtual address of the task memory pool * kmem_addr Kernel memory for task struct * lock Mutex lock for the array access. * alloc_table Keep track of the index being assigned * and freed for a task * max_task_cnt Maximum task count that can be supported. */ struct nvdla_queue_task_pool { dma_addr_t dma_addr; void *va; void *kmem_addr; struct mutex lock; unsigned long alloc_table; unsigned long max_task_cnt; }; static int nvdla_queue_task_pool_alloc(struct platform_device *pdev, struct nvdla_queue *queue, unsigned int num_tasks) { int err = 0; struct nvdla_queue_task_pool *task_pool; task_pool = queue->task_pool; /* Allocate the kernel memory needed for the task */ if (queue->task_kmem_size) { size_t kmem_pool_size = num_tasks * queue->task_kmem_size; task_pool->kmem_addr = vzalloc(kmem_pool_size); if (!task_pool->kmem_addr) { dev_err(&pdev->dev, "failed to allocate task_pool->kmem_addr\n"); err = -ENOMEM; goto err_alloc_task_kmem; } } /* Allocate memory for the task itself */ task_pool->va = dma_alloc_attrs(&pdev->dev, queue->task_dma_size * num_tasks, &task_pool->dma_addr, GFP_KERNEL, 0); if (task_pool->va == NULL) { dev_err(&pdev->dev, "failed to allocate task_pool->va\n"); err = -ENOMEM; goto err_alloc_task_pool; } task_pool->max_task_cnt = num_tasks; mutex_init(&task_pool->lock); return err; err_alloc_task_pool: vfree(task_pool->kmem_addr); err_alloc_task_kmem: return err; } static void nvdla_queue_task_free_pool(struct platform_device *pdev, struct nvdla_queue *queue) { struct nvdla_queue_task_pool *task_pool = (struct nvdla_queue_task_pool *)queue->task_pool; dma_free_attrs(&queue->vm_pdev->dev, queue->task_dma_size * task_pool->max_task_cnt, task_pool->va, task_pool->dma_addr, 0); vfree(task_pool->kmem_addr); task_pool->max_task_cnt = 0; task_pool->alloc_table = 0; } static int nvdla_queue_dump(struct nvdla_queue_pool *pool, struct nvdla_queue *queue, struct seq_file *s) { if (pool->ops && pool->ops->dump) pool->ops->dump(queue, s); return 0; } static int queue_dump(struct seq_file *s, void *data) { struct nvdla_queue_pool *pool = s->private; unsigned long queue_id; mutex_lock(&pool->queue_lock); for_each_set_bit(queue_id, &pool->alloc_table, pool->max_queue_cnt) nvdla_queue_dump(pool, &pool->queues[queue_id], s); mutex_unlock(&pool->queue_lock); return 0; } static int queue_expose_open(struct inode *inode, struct file *file) { return single_open(file, queue_dump, inode->i_private); } static const struct file_operations queue_expose_operations = { .open = queue_expose_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; struct nvdla_queue_pool *nvdla_queue_init(struct platform_device *pdev, struct nvdla_queue_ops *ops, unsigned int num_queues) { struct nvhost_device_data *pdata; struct nvdla_queue_pool *pool; struct nvdla_queue *queues; struct nvdla_queue *queue; struct nvdla_queue_task_pool *task_pool; unsigned int i; int err; pool = kzalloc(sizeof(struct nvdla_queue_pool), GFP_KERNEL); if (pool == NULL) { dev_err(&pdev->dev, "failed to allocate queue pool\n"); err = -ENOMEM; goto fail_alloc_pool; } queues = kcalloc(num_queues, sizeof(struct nvdla_queue), GFP_KERNEL); if (queues == NULL) { dev_err(&pdev->dev, "failed to allocate queues\n"); err = -ENOMEM; goto fail_alloc_queues; } task_pool = kcalloc(num_queues, sizeof(struct nvdla_queue_task_pool), GFP_KERNEL); if (task_pool == NULL) { dev_err(&pdev->dev, "failed to allocate task_pool\n"); err = -ENOMEM; goto fail_alloc_task_pool; } pdata = platform_get_drvdata(pdev); /* initialize pool and queues */ pool->pdev = pdev; pool->ops = ops; pool->queues = queues; pool->alloc_table = 0; pool->max_queue_cnt = num_queues; pool->queue_task_pool = task_pool; mutex_init(&pool->queue_lock); debugfs_create_file("queues", S_IRUGO, pdata->debugfs, pool, &queue_expose_operations); for (i = 0; i < num_queues; i++) { queue = &queues[i]; queue->id = i; queue->pool = pool; queue->task_pool = (void *)&task_pool[i]; nvdla_queue_get_task_size(queue); } spec_bar(); /* break_spec_p#5_1 */ return pool; fail_alloc_task_pool: kfree(pool->queues); fail_alloc_queues: kfree(pool); fail_alloc_pool: return ERR_PTR(err); } void nvdla_queue_deinit(struct nvdla_queue_pool *pool) { if (!pool) return; kfree(pool->queue_task_pool); kfree(pool->queues); kfree(pool); pool = NULL; } static void nvdla_queue_cleanup(struct nvdla_queue *queue) { struct nvdla_queue_pool *pool = queue->pool; if (pool->ops && pool->ops->cleanup) pool->ops->cleanup(queue); } static void nvdla_queue_cleanup_all(struct nvdla_queue_pool *pool) { u32 id; mutex_lock(&pool->queue_lock); for_each_set_bit(id, &pool->alloc_table, pool->max_queue_cnt) nvdla_queue_cleanup(&pool->queues[id]); mutex_unlock(&pool->queue_lock); } #ifdef CONFIG_PM int nvdla_queue_pool_prepare_suspend(struct nvdla_queue_pool *qpool) { int err = 0; unsigned int queue_id; mutex_lock(&qpool->queue_lock); /* For each active queues, ensure there are no outstanding tasks. */ for_each_set_bit(queue_id, &qpool->alloc_table, qpool->max_queue_cnt) { struct nvdla_queue *queue = &qpool->queues[queue_id]; struct nvdla_queue_task_pool *tpool = queue->task_pool; bool nvdla_queue_is_idle; /* Cleanup the queue before checking the idleness. */ nvdla_queue_cleanup(queue); mutex_lock(&tpool->lock); nvdla_queue_is_idle = (tpool->alloc_table == 0ULL); mutex_unlock(&tpool->lock); if (!nvdla_queue_is_idle) { err = -EBUSY; goto fail_busy; } } fail_busy: mutex_unlock(&qpool->queue_lock); return err; } #endif /* CONFIG_PM */ void nvdla_queue_abort_all(struct nvdla_queue_pool *pool) { u32 id; mutex_lock(&pool->queue_lock); for_each_set_bit(id, &pool->alloc_table, pool->max_queue_cnt) nvdla_queue_abort(&pool->queues[id]); mutex_unlock(&pool->queue_lock); } static void nvdla_queue_release(struct kref *ref) { struct nvdla_queue *queue = container_of(ref, struct nvdla_queue, kref); struct nvdla_queue_pool *pool = queue->pool; nvdla_dbg_fn(pool->pdev, "%s\n", __func__); if (queue->use_channel) nvdla_putchannel(queue); /* release allocated resources */ nvdla_sync_destroy(queue->sync_context); /* free the task_pool */ if (queue->task_dma_size) nvdla_queue_task_free_pool(pool->pdev, queue); /* ..and mark the queue free */ mutex_lock(&pool->queue_lock); clear_bit(queue->id, &pool->alloc_table); mutex_unlock(&pool->queue_lock); } void nvdla_queue_put(struct nvdla_queue *queue) { nvdla_dbg_fn(queue->pool->pdev, "%s\n", __func__); kref_put(&queue->kref, nvdla_queue_release); } void nvdla_queue_get(struct nvdla_queue *queue) { nvdla_dbg_fn(queue->pool->pdev, "%s\n", __func__); kref_get(&queue->kref); } struct nvdla_queue *nvdla_queue_alloc(struct nvdla_queue_pool *pool, unsigned int num_tasks, bool use_channel) { struct platform_device *pdev = pool->pdev; struct nvhost_device_data *pdata = platform_get_drvdata(pdev); struct nvdla_device *nvdla_dev = pdata->private_data; struct nvdla_queue *queues = pool->queues; struct nvdla_queue *queue; int index = 0; int err = 0; mutex_lock(&pool->queue_lock); index = find_first_zero_bit(&pool->alloc_table, pool->max_queue_cnt); /* Queue not found on first attempt. */ if (index >= pool->max_queue_cnt) { mutex_unlock(&pool->queue_lock); /* Cleanup and retry one more time before erroring out */ nvdla_queue_cleanup_all(pool); mutex_lock(&pool->queue_lock); index = find_first_zero_bit(&pool->alloc_table, pool->max_queue_cnt); if (index >= pool->max_queue_cnt) { dev_err(&pdev->dev, "failed to get free Queue\n"); err = -ENOMEM; goto err_alloc_queue; } } spec_bar(); /* break_spec_p#1 */ /* reserve the queue */ queue = &queues[index]; set_bit(index, &pool->alloc_table); /* allocate a sync context for the queue */ queue->sync_context = nvdla_sync_create(nvdla_dev->sync_dev); if (queue->sync_context == NULL) { dev_err(&pdev->dev, "failed to create sync context\n"); err = -ENOMEM; goto err_alloc_sync; } /* initialize queue ref count and sequence*/ kref_init(&queue->kref); queue->use_channel = use_channel; queue->sequence = 0; /* initialize task list */ INIT_LIST_HEAD(&queue->tasklist); mutex_init(&queue->list_lock); /* initialize task list */ queue->attr = NULL; mutex_init(&queue->attr_lock); mutex_unlock(&pool->queue_lock); /* Check if the queue should allocate a channel */ if (use_channel) { queue->vm_pdev = nvdla_channel_map(pdev, queue); if (!queue->vm_pdev) goto err_alloc_channel; } else { queue->vm_pdev = pdev; } if (queue->task_dma_size) { err = nvdla_queue_task_pool_alloc(queue->vm_pdev, queue, num_tasks); if (err < 0) goto err_alloc_task_pool; } return queue; err_alloc_task_pool: if (use_channel) nvdla_putchannel(queue); err_alloc_channel: mutex_lock(&pool->queue_lock); nvdla_sync_destroy(queue->sync_context); err_alloc_sync: clear_bit(queue->id, &pool->alloc_table); err_alloc_queue: mutex_unlock(&pool->queue_lock); return ERR_PTR(err); } int nvdla_queue_abort(struct nvdla_queue *queue) { struct nvdla_queue_pool *pool = queue->pool; if (pool->ops && pool->ops->abort) return pool->ops->abort(queue); return 0; } int nvdla_queue_submit(struct nvdla_queue *queue, void *task_arg) { struct nvdla_queue_pool *pool = queue->pool; if (pool->ops && pool->ops->submit) return pool->ops->submit(queue, task_arg); return 0; } int nvdla_queue_set_attr(struct nvdla_queue *queue, void *arg) { struct nvdla_queue_pool *pool = queue->pool; if (pool->ops && pool->ops->set_attribute) return pool->ops->set_attribute(queue, arg); return 0; } struct nvdla_queue_task { struct platform_device *host1x_pdev; struct nvdla_queue *queue; dma_addr_t dma_addr; u32 *cpu_addr; }; #if IS_ENABLED(CONFIG_TEGRA_NVDLA_CHANNEL) static void queue_task_update(void *priv, int nr_completed) { struct nvdla_queue_task *task = priv; struct platform_device *host1x_pdev = task->host1x_pdev; dma_free_coherent(&host1x_pdev->dev, CMDBUF_SIZE, task->cpu_addr, task->dma_addr); kfree(task); } int nvdla_queue_submit_to_host1x(struct nvdla_queue *queue, u32 *cmdbuf, u32 num_cmdbuf_words, u32 num_syncpt_incrs, u32 *wait_syncpt_ids, u32 *wait_syncpt_thresholds, u32 num_syncpt_waits, u32 *task_syncpt_threshold) { struct nvdla_queue_pool *pool = queue->pool; struct platform_device *client_pdev = pool->pdev; struct platform_device *host1x_pdev = to_platform_device(client_pdev->dev.parent); struct nvhost_device_data *pdata = platform_get_drvdata(client_pdev); struct nvdla_queue_task *task; struct nvhost_job *job; unsigned int i; int err = 0; if (queue->use_channel == false) return -EINVAL; /* Allocate memory for the task and task command buffer */ task = kzalloc(sizeof(*task), GFP_KERNEL); if (task == NULL) { dev_err(&client_pdev->dev, "failed to allocate task\n"); goto err_alloc_task; } task->cpu_addr = dma_alloc_coherent(&host1x_pdev->dev, CMDBUF_SIZE, &task->dma_addr, GFP_KERNEL); if (task->cpu_addr == NULL) { dev_err(&client_pdev->dev, "failed to allocate task\n"); err = -ENOMEM; goto err_alloc_cmdbuf; } /* Copy the command buffer */ memcpy(task->cpu_addr, cmdbuf, num_cmdbuf_words * 4); job = nvhost_job_alloc(queue->channel, 1, 0, num_syncpt_waits, 1); if (job == NULL) { err = -ENOMEM; goto err_alloc_job; } task->queue = queue; task->host1x_pdev = host1x_pdev; /* Write waits to the job */ job->num_waitchk = num_syncpt_waits; for (i = 0; i < num_syncpt_waits; i++) { job->waitchk[i].syncpt_id = wait_syncpt_ids[i]; job->waitchk[i].thresh = wait_syncpt_thresholds[i]; job->waitchk[i].mem = 0; } spec_bar(); /* break_spec_p#5_1 */ /* Initialize syncpoint increments */ job->sp->id = queue->syncpt_id; job->sp->incrs = num_syncpt_incrs; job->num_syncpts = 1; /* Add the command buffer */ nvhost_job_add_client_gather_address(job, num_cmdbuf_words, pdata->class, task->dma_addr); /* Submit task to hardware */ err = nvhost_channel_submit(job); if (err < 0) goto err_submit_job; /* Return the number of increments back to the caller */ *task_syncpt_threshold = job->sp->fence; /* Register a callback function for releasing resources */ err = nvhost_intr_register_notifier(host1x_pdev, queue->syncpt_id, job->sp->fence, queue_task_update, task); if (err < 0) { dev_err(&client_pdev->dev, "failed to register notifier err=%d", err); goto err_register_notifier; } /* nvhost keeps a reference on the job and we don't * need to access it anymore */ nvhost_job_put(job); return 0; err_register_notifier: err_submit_job: nvhost_job_put(job); err_alloc_job: dma_free_coherent(&host1x_pdev->dev, CMDBUF_SIZE, task->cpu_addr, task->dma_addr); err_alloc_cmdbuf: kfree(task); err_alloc_task: return err; } #endif int nvdla_queue_get_task_size(struct nvdla_queue *queue) { struct nvdla_queue_pool *pool = queue->pool; if (pool->ops && pool->ops->get_task_size) pool->ops->get_task_size(&queue->task_dma_size, &queue->task_kmem_size); return 0; } int nvdla_queue_alloc_task_memory( struct nvdla_queue *queue, struct nvdla_queue_task_mem_info *task_mem_info) { int err = 0; int index, hw_offset, sw_offset; struct platform_device *pdev = queue->pool->pdev; struct nvdla_queue_task_pool *task_pool = (struct nvdla_queue_task_pool *)queue->task_pool; mutex_lock(&task_pool->lock); index = find_first_zero_bit(&task_pool->alloc_table, task_pool->max_task_cnt); /* quit if pre-allocated task array is not free */ if (index >= task_pool->max_task_cnt) { dev_err(&pdev->dev, "failed to get Task Pool Memory\n"); err = -EAGAIN; goto err_alloc_task_mem; } /* assign the task array */ set_bit(index, &task_pool->alloc_table); hw_offset = index * queue->task_dma_size; sw_offset = index * queue->task_kmem_size; task_mem_info->kmem_addr = (void *)((u8 *)task_pool->kmem_addr + sw_offset); task_mem_info->va = (void *)((u8 *)task_pool->va + hw_offset); task_mem_info->dma_addr = task_pool->dma_addr + hw_offset; task_mem_info->pool_index = index; err_alloc_task_mem: mutex_unlock(&task_pool->lock); return err; } void nvdla_queue_free_task_memory(struct nvdla_queue *queue, int index) { int hw_offset, sw_offset; u8 *task_kmem, *task_dma_va; struct nvdla_queue_task_pool *task_pool = (struct nvdla_queue_task_pool *)queue->task_pool; /* clear task kernel and dma virtual memory contents*/ hw_offset = index * queue->task_dma_size; sw_offset = index * queue->task_kmem_size; task_kmem = (u8 *)task_pool->kmem_addr + sw_offset; task_dma_va = (u8 *)task_pool->va + hw_offset; memset(task_kmem, 0, queue->task_kmem_size); memset(task_dma_va, 0, queue->task_dma_size); mutex_lock(&task_pool->lock); clear_bit(index, &task_pool->alloc_table); mutex_unlock(&task_pool->lock); }