From b2381d6aec7580f08b81e332ef32c80c90499c4f Mon Sep 17 00:00:00 2001 From: Amruta Bhamidipati Date: Fri, 23 Jun 2023 21:10:06 +0000 Subject: [PATCH] drivers: pva: Add support for next chip - PVA initialization and programming sequence update for next chip. - Update to VMEM regions and addresses. Signed-off-by: Amruta Bhamidipati Change-Id: I25b0fae260c516d5a7521aabc994a87525555577 Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2925454 Reviewed-by: svc_kernel_abi Reviewed-by: Sreehari Mohan Reviewed-by: Krish Agarwal Reviewed-by: Omar Nemri GVS: Gerrit_Virtual_Submit Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/2999149 Tested-by: Omar Nemri --- drivers/video/tegra/host/pva/hw_vmem_pva.h | 6 +- drivers/video/tegra/host/pva/nvpva_client.c | 4 +- drivers/video/tegra/host/pva/pva.c | 60 +++++++++++++++---- drivers/video/tegra/host/pva/pva.h | 3 +- .../tegra/host/pva/pva_iommu_context_dev.c | 39 +++++++++--- .../tegra/host/pva/pva_iommu_context_dev.h | 4 +- drivers/video/tegra/host/pva/pva_queue.c | 10 +++- drivers/video/tegra/host/pva/pva_vpu_exe.c | 24 ++++++-- 8 files changed, 116 insertions(+), 34 deletions(-) diff --git a/drivers/video/tegra/host/pva/hw_vmem_pva.h b/drivers/video/tegra/host/pva/hw_vmem_pva.h index 1c3fefac..cc4d2f94 100644 --- a/drivers/video/tegra/host/pva/hw_vmem_pva.h +++ b/drivers/video/tegra/host/pva/hw_vmem_pva.h @@ -6,8 +6,10 @@ #ifndef _hw_vmem_pva_h_ #define _hw_vmem_pva_h_ -#define NUM_HEM_GEN 2U -#define VMEM_REGION_COUNT 3U +#define NUM_HEM_GEN 3U +#define VMEM_REGION_COUNT_T19x 3U +#define VMEM_REGION_COUNT_T23x 3U + #define T19X_VMEM0_START 0x40U #define T19X_VMEM0_END 0x10000U #define T19X_VMEM1_START 0x40000U diff --git a/drivers/video/tegra/host/pva/nvpva_client.c b/drivers/video/tegra/host/pva/nvpva_client.c index 7ad0df75..b8731063 100644 --- a/drivers/video/tegra/host/pva/nvpva_client.c +++ b/drivers/video/tegra/host/pva/nvpva_client.c @@ -49,7 +49,7 @@ client_context_search_locked(struct platform_device *pdev, c_node->pva = dev; c_node->curr_sema_value = 0; mutex_init(&c_node->sema_val_lock); - if (dev->version == PVA_HW_GEN2) { + if (dev->version != PVA_HW_GEN1) { c_node->cntxt_dev = nvpva_iommu_context_dev_allocate(NULL, 0, @@ -70,7 +70,7 @@ client_context_search_locked(struct platform_device *pdev, dev_err(&dev->pdev->dev, "failed to init nvhost buffer for client:%lu", PTR_ERR(c_node->buffers)); - if (dev->version == PVA_HW_GEN2) + if (dev->version != PVA_HW_GEN1) nvpva_iommu_context_dev_release(c_node->cntxt_dev); c_node = NULL; } diff --git a/drivers/video/tegra/host/pva/pva.c b/drivers/video/tegra/host/pva/pva.c index 50a4c871..f02eef1a 100644 --- a/drivers/video/tegra/host/pva/pva.c +++ b/drivers/video/tegra/host/pva/pva.c @@ -61,6 +61,10 @@ #include "pva-fw-address-map.h" #include "pva_sec_ec.h" +#ifdef CONFIG_TEGRA_T26X_GRHOST_PVA +#include "pva_t264.h" +#endif + /* * NO IOMMU set 0x60000000 as start address. * With IOMMU set 0x80000000(>2GB) as startaddress @@ -82,6 +86,10 @@ static u32 vm_regs_sid_idx_t234[] = {1, 2, 3, 4, 5, 6, 7, 7, #endif static u32 vm_regs_reg_idx_t234[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 9, 9, 0, 0, 0}; +#ifndef CONFIG_TEGRA_T26X_GRHOST_PVA +static u32 *vm_regs_sid_idx_t264 = vm_regs_sid_idx_t234; +static u32 *vm_regs_reg_idx_t264 = vm_regs_reg_idx_t234; +#endif static char *aux_dev_name = "16000000.pva0:pva0_niso1_ctx7"; static u32 aux_dev_name_len = 29; @@ -197,6 +205,12 @@ static struct of_device_id tegra_pva_of_match[] = { .name = "pva0", .compatible = "nvidia,tegra234-pva-hv", .data = (struct nvhost_device_data *)&t23x_pva0_info }, +#ifdef CONFIG_TEGRA_T26X_GRHOST_PVA + { + .name = "pva0", + .compatible = "nvidia,tegra264-pva", + .data = (struct nvhost_device_data *)&t264_pva0_info }, +#endif { }, }; @@ -768,9 +782,12 @@ static int nvpva_write_hwid(struct platform_device *pdev) if (pva->version == PVA_HW_GEN1) { id_idx = vm_regs_sid_idx_t19x; reg_idx = vm_regs_reg_idx_t19x; - } else { + } else if (pva->version == PVA_HW_GEN2) { id_idx = vm_regs_sid_idx_t234; reg_idx = vm_regs_reg_idx_t234; + } else { + id_idx = vm_regs_sid_idx_t264; + reg_idx = vm_regs_reg_idx_t264; } /* Go through the StreamIDs and assemble register values */ @@ -1040,7 +1057,9 @@ static int pva_probe(struct platform_device *pdev) struct pva *pva; int err = 0; size_t i; +#ifndef CONFIG_TEGRA_T26X_GRHOST_PVA u32 offset; +#endif #if !IS_ENABLED(CONFIG_TEGRA_GRHOST) struct kobj_attribute *attr = NULL; @@ -1069,7 +1088,7 @@ static int pva_probe(struct platform_device *pdev) #endif if ((pdata->version != PVA_HW_GEN1) - && !is_cntxt_initialized()) { + && !is_cntxt_initialized(pdata->version)) { dev_warn(&pdev->dev, "nvpva cntxt was not initialized, deferring probe."); return -EPROBE_DEFER; @@ -1096,8 +1115,15 @@ static int pva_probe(struct platform_device *pdev) } /* Initialize PVA private data */ - if (pdata->version == PVA_HW_GEN2) { + if (pdata->version == PVA_HW_GEN3) { + pva->version = PVA_HW_GEN3; + pdata->firmware_name = "nvpva_030.fw"; + pdata->firmware_not_in_subdir = true; + pva->submit_cmd_mode = PVA_SUBMIT_MODE_MMIO_CCQ; + pva->version_config = &pva_t23x_config; + } else if (pdata->version == PVA_HW_GEN2) { pva->version = PVA_HW_GEN2; + dev_info(&pdev->dev, "pdata->version is HW_GEN2"); pdata->firmware_name = "nvpva_020.fw"; pdata->firmware_not_in_subdir = true; pva->submit_cmd_mode = PVA_SUBMIT_MODE_MMIO_CCQ; @@ -1192,18 +1218,28 @@ static int pva_probe(struct platform_device *pdev) goto err_client_device_init; } - if (pdata->version != PVA_HW_GEN1) { + dev_info(dev, "Completed nvhost_client_device_init\n"); + + if (pdata->version == PVA_HW_GEN1) { + pva->aux_pdev = pva->pdev; + } else if (pdata->version == PVA_HW_GEN2) { pva->aux_pdev = nvpva_iommu_context_dev_allocate(aux_dev_name, aux_dev_name_len, false); - if (pva->aux_pdev == NULL) { - dev_err(&pva->pdev->dev, - "failed to allocate aux device"); - goto err_context_alloc; - } } else { - pva->aux_pdev = pva->pdev; +#ifdef CONFIG_TEGRA_T26X_GRHOST_PVA + pva->aux_pdev = + nvpva_iommu_context_dev_allocate(aux_dev_name_t264, + aux_dev_name_len_t264, + false); +#endif + } + + if (pva->aux_pdev == NULL) { + dev_err(&pva->pdev->dev, + "failed to allocate aux device"); + goto err_context_alloc; } pva->pool = nvpva_queue_init(pdev, pva->aux_pdev, &pva_queue_ops, @@ -1256,7 +1292,7 @@ static int pva_probe(struct platform_device *pdev) pva->sid_count = 0; err = nvpva_iommu_context_dev_get_sids(&pva->sids[1], &pva->sid_count, - NVPVA_USER_VM_COUNT); + pdata->version); if (err) goto err_iommu_ctxt_init; @@ -1268,6 +1304,7 @@ static int pva_probe(struct platform_device *pdev) ++(pva->sid_count); +#ifndef CONFIG_TEGRA_T26X_GRHOST_PVA offset = hwpm_get_offset(); if ((UINT_MAX - offset) < pdev->resource[0].start) { @@ -1282,6 +1319,7 @@ static int pva_probe(struct platform_device *pdev) pva->hwpm_ip_ops.hwpm_ip_pm = &pva_hwpm_ip_pm; pva->hwpm_ip_ops.hwpm_ip_reg_op = &pva_hwpm_ip_reg_op; tegra_soc_hwpm_ip_register(&pva->hwpm_ip_ops); +#endif #if !IS_ENABLED(CONFIG_TEGRA_GRHOST) if (pdata->num_clks > 0) { diff --git a/drivers/video/tegra/host/pva/pva.h b/drivers/video/tegra/host/pva/pva.h index 6e9632d5..e6fb60be 100644 --- a/drivers/video/tegra/host/pva/pva.h +++ b/drivers/video/tegra/host/pva/pva.h @@ -54,8 +54,6 @@ struct pva_version_info { #define MAX_PVA_TASK_COUNT_PER_QUEUE_SEG \ (MAX_PVA_TASK_COUNT_PER_QUEUE/MAX_PVA_SEG_COUNT_PER_QUEUE) -#define NVPVA_USER_VM_COUNT MAX_PVA_CLIENTS - /** * Maximum task count that a PVA engine can support */ @@ -254,6 +252,7 @@ struct pva_status_interface_registers { #define PVA_HW_GEN1 1 #define PVA_HW_GEN2 2 +#define PVA_HW_GEN3 3 /** * @brief HW version specific configuration and functions diff --git a/drivers/video/tegra/host/pva/pva_iommu_context_dev.c b/drivers/video/tegra/host/pva/pva_iommu_context_dev.c index 99348d39..45fa3467 100644 --- a/drivers/video/tegra/host/pva/pva_iommu_context_dev.c +++ b/drivers/video/tegra/host/pva/pva_iommu_context_dev.c @@ -16,10 +16,23 @@ #include #include #include +#include +#include #include "pva_iommu_context_dev.h" #include "pva.h" +#define NVPVA_CNTXT_DEV_NAME_LEN_T23X (29U) +#define NVPVA_CNTXT_DEVICE_CNT (8U) + +#ifdef CONFIG_TEGRA_T26X_GRHOST_PVA +#include "pva_cntxt_dev_name_t264.h" +#include "pva_iommu_context_dev_t264.h" +#else +#define NVPVA_CNTXT_DEV_NAME_LEN NVPVA_CNTXT_DEV_NAME_LEN_T23X +#define NVPVA_CNTXT_DEVICE_CNT_T264 NVPVA_CNTXT_DEVICE_CNT +#endif + static u32 cntxt_dev_count; static char *dev_names[] = { "pva0_niso1_ctx0", @@ -30,6 +43,9 @@ static char *dev_names[] = { "pva0_niso1_ctx5", "pva0_niso1_ctx6", "pva0_niso1_ctx7", +#ifdef CONFIG_TEGRA_T26X_GRHOST_PVA + PVA_CNTXT_DEV_NAME_T264 +#endif }; static const struct of_device_id pva_iommu_context_dev_of_match[] = { @@ -49,22 +65,31 @@ struct pva_iommu_ctx { static LIST_HEAD(pva_iommu_ctx_list); static DEFINE_MUTEX(pva_iommu_ctx_list_mutex); -bool is_cntxt_initialized(void) +bool is_cntxt_initialized(const int hw_gen) { - return (cntxt_dev_count == 8); + u32 pva_cntxt_dev_cnt = (hw_gen == PVA_HW_GEN3) ? NVPVA_CNTXT_DEVICE_CNT_T264 + : NVPVA_CNTXT_DEVICE_CNT; + return (cntxt_dev_count == pva_cntxt_dev_cnt); } -int nvpva_iommu_context_dev_get_sids(int *hwids, int *count, int max_cnt) +int nvpva_iommu_context_dev_get_sids(int *hwids, int *count, const int hw_gen) { struct pva_iommu_ctx *ctx; int err = 0; int i; + u32 pva_cntxt_dev_cnt; + + if (hw_gen == PVA_HW_GEN3) + pva_cntxt_dev_cnt = NVPVA_CNTXT_DEVICE_CNT_T264; + else + pva_cntxt_dev_cnt = NVPVA_CNTXT_DEVICE_CNT; *count = 0; mutex_lock(&pva_iommu_ctx_list_mutex); - for (i = 0; i < max_cnt; i++) { + for (i = 0; i < pva_cntxt_dev_cnt; i++) { list_for_each_entry(ctx, &pva_iommu_ctx_list, list) { - if (strnstr(ctx->pdev->name, dev_names[i], 29) != NULL) { + if (strnstr(ctx->pdev->name, dev_names[i], + NVPVA_CNTXT_DEV_NAME_LEN) != NULL) { hwids[*count] = nvpva_get_device_hwid(ctx->pdev, 0); if (hwids[*count] < 0) { err = hwids[*count]; @@ -72,7 +97,7 @@ int nvpva_iommu_context_dev_get_sids(int *hwids, int *count, int max_cnt) } ++(*count); - if (*count >= max_cnt) + if (*count >= pva_cntxt_dev_cnt) break; } } @@ -167,7 +192,7 @@ static int pva_iommu_context_dev_probe(struct platform_device *pdev) return -ENOMEM; } - if (strnstr(pdev->name, dev_names[7], 29) != NULL) + if (strnstr(pdev->name, dev_names[7], NVPVA_CNTXT_DEV_NAME_LEN) != NULL) dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); else dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(39)); diff --git a/drivers/video/tegra/host/pva/pva_iommu_context_dev.h b/drivers/video/tegra/host/pva/pva_iommu_context_dev.h index ed8d03c8..40cc9ee1 100644 --- a/drivers/video/tegra/host/pva/pva_iommu_context_dev.h +++ b/drivers/video/tegra/host/pva/pva_iommu_context_dev.h @@ -11,7 +11,7 @@ struct platform_device *nvpva_iommu_context_dev_allocate(char *identifier, size_t len, bool shared); void nvpva_iommu_context_dev_release(struct platform_device *pdev); -int nvpva_iommu_context_dev_get_sids(int *hwids, int *count, int max_cnt); -bool is_cntxt_initialized(void); +int nvpva_iommu_context_dev_get_sids(int *hwids, int *count, const int hw_gen); +bool is_cntxt_initialized(const int hw_gen); #endif diff --git a/drivers/video/tegra/host/pva/pva_queue.c b/drivers/video/tegra/host/pva/pva_queue.c index f79fa2ca..36cb9e6d 100644 --- a/drivers/video/tegra/host/pva/pva_queue.c +++ b/drivers/video/tegra/host/pva/pva_queue.c @@ -198,10 +198,14 @@ pva_task_pin_fence(struct pva_submit_task *task, if (IS_ERR(mem)) { task_err(task, "sempahore submit pin failed"); err = PTR_ERR(mem); - } else + } else { *addr = mem->dma_addr + fence->obj.sem.mem.offset; - *serial_id = mem->serial_id; + *serial_id = mem->serial_id; + nvpva_dbg_info(task->pva, + "id = %d, semaphore addr = %llx", + fence->obj.sem.mem.pin_id, *addr); + } break; } case NVPVA_FENCE_OBJ_SYNCPT: { @@ -1257,7 +1261,7 @@ set_task_parameters(const struct pva_submit_tasks *task_header) * thus the response should come in the correct CCQ */ if ((task->pva->submit_task_mode == PVA_SUBMIT_MODE_MMIO_CCQ) - && (task_header->tasks[0]->pva->version == PVA_HW_GEN2)) + && (task_header->tasks[0]->pva->version != PVA_HW_GEN1)) status_interface = (task->queue->id + 1U); for (idx = 0U; idx < task_header->num_tasks; idx++) { diff --git a/drivers/video/tegra/host/pva/pva_vpu_exe.c b/drivers/video/tegra/host/pva/pva_vpu_exe.c index ea2f65c0..fe51c4de 100644 --- a/drivers/video/tegra/host/pva/pva_vpu_exe.c +++ b/drivers/video/tegra/host/pva/pva_vpu_exe.c @@ -9,9 +9,15 @@ #include "nvpva_elf_parser.h" #include "pva_bit_helpers.h" #include "pva.h" -#include "hw_vmem_pva.h" +#include "hw_vmem_pva.h" #include "pva_vpu_exe.h" +#ifdef CONFIG_TEGRA_T26X_GRHOST_PVA +#include "hw_vmem_pva_t264.h" +#else +#define VMEM_REGION_COUNT_T26x 4 +#endif + #define ELF_MAXIMUM_SECTION_NAME 64 #define ELF_EXPORTS_SECTION "EXPORTS" #define DATA_SECTION_ALIGNMENT 32 @@ -1123,16 +1129,22 @@ struct vmem_region { uint32_t end; }; -struct vmem_region vmem_regions_tab[NUM_HEM_GEN + 1][VMEM_REGION_COUNT] = { +struct vmem_region vmem_regions_tab[NUM_HEM_GEN + 1][VMEM_REGION_COUNT_T26x] = { {{.start = 0, .end = 0}, + {.start = 0, .end = 0}, {.start = 0, .end = 0}, {.start = 0, .end = 0}}, {{.start = T19X_VMEM0_START, .end = T19X_VMEM0_END}, {.start = T19X_VMEM1_START, .end = T19X_VMEM1_END}, - {.start = T19X_VMEM2_START, .end = T19X_VMEM2_END}}, + {.start = T19X_VMEM2_START, .end = T19X_VMEM2_END}, + {.start = 0xFFFFFFFF, .end = 0}}, {{.start = T23x_VMEM0_START, .end = T23x_VMEM0_END}, {.start = T23x_VMEM1_START, .end = T23x_VMEM1_END}, - {.start = T23x_VMEM2_START, .end = T23x_VMEM2_END}}, + {.start = T23x_VMEM2_START, .end = T23x_VMEM2_END}, + {.start = 0xFFFFFFFF, .end = 0}}, +#ifdef CONFIG_TEGRA_T26X_GRHOST_PVA + #include "pva_vmem_regions_tab_t264.h" +#endif }; int32_t @@ -1143,13 +1155,15 @@ nvpva_validate_vmem_offset(const uint32_t vmem_offset, int i; int32_t err = -EINVAL; + const u8 vmem_region_count = (hw_gen == PVA_HW_GEN3) ? VMEM_REGION_COUNT_T26x + : VMEM_REGION_COUNT_T23x; if (hw_gen < 0 || hw_gen > NUM_HEM_GEN) { pr_err("invalid hw_gen index: %d", hw_gen); return err; } - for (i = VMEM_REGION_COUNT; i > 0; i--) { + for (i = vmem_region_count; i > 0; i--) { if (vmem_offset >= vmem_regions_tab[hw_gen][i-1].start) break; }