drivers: pva: Add support for next chip

- PVA initialization and programming sequence update for next chip.
- Update to VMEM regions and addresses.

Signed-off-by: Amruta Bhamidipati <abhamidipati@nvidia.com>
Change-Id: I25b0fae260c516d5a7521aabc994a87525555577
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2925454
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Sreehari Mohan <sreeharim@nvidia.com>
Reviewed-by: Krish Agarwal <krisha@nvidia.com>
Reviewed-by: Omar Nemri <onemri@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/2999149
Tested-by: Omar Nemri <onemri@nvidia.com>
This commit is contained in:
Amruta Bhamidipati
2023-06-23 21:10:06 +00:00
committed by mobile promotions
parent aaa19719df
commit b2381d6aec
8 changed files with 116 additions and 34 deletions

View File

@@ -6,8 +6,10 @@
#ifndef _hw_vmem_pva_h_
#define _hw_vmem_pva_h_
#define NUM_HEM_GEN 2U
#define VMEM_REGION_COUNT 3U
#define NUM_HEM_GEN 3U
#define VMEM_REGION_COUNT_T19x 3U
#define VMEM_REGION_COUNT_T23x 3U
#define T19X_VMEM0_START 0x40U
#define T19X_VMEM0_END 0x10000U
#define T19X_VMEM1_START 0x40000U

View File

@@ -49,7 +49,7 @@ client_context_search_locked(struct platform_device *pdev,
c_node->pva = dev;
c_node->curr_sema_value = 0;
mutex_init(&c_node->sema_val_lock);
if (dev->version == PVA_HW_GEN2) {
if (dev->version != PVA_HW_GEN1) {
c_node->cntxt_dev =
nvpva_iommu_context_dev_allocate(NULL,
0,
@@ -70,7 +70,7 @@ client_context_search_locked(struct platform_device *pdev,
dev_err(&dev->pdev->dev,
"failed to init nvhost buffer for client:%lu",
PTR_ERR(c_node->buffers));
if (dev->version == PVA_HW_GEN2)
if (dev->version != PVA_HW_GEN1)
nvpva_iommu_context_dev_release(c_node->cntxt_dev);
c_node = NULL;
}

View File

@@ -61,6 +61,10 @@
#include "pva-fw-address-map.h"
#include "pva_sec_ec.h"
#ifdef CONFIG_TEGRA_T26X_GRHOST_PVA
#include "pva_t264.h"
#endif
/*
* NO IOMMU set 0x60000000 as start address.
* With IOMMU set 0x80000000(>2GB) as startaddress
@@ -82,6 +86,10 @@ static u32 vm_regs_sid_idx_t234[] = {1, 2, 3, 4, 5, 6, 7, 7,
#endif
static u32 vm_regs_reg_idx_t234[] = {0, 1, 2, 3, 4, 5, 6, 7,
8, 8, 8, 9, 9, 0, 0, 0};
#ifndef CONFIG_TEGRA_T26X_GRHOST_PVA
static u32 *vm_regs_sid_idx_t264 = vm_regs_sid_idx_t234;
static u32 *vm_regs_reg_idx_t264 = vm_regs_reg_idx_t234;
#endif
static char *aux_dev_name = "16000000.pva0:pva0_niso1_ctx7";
static u32 aux_dev_name_len = 29;
@@ -197,6 +205,12 @@ static struct of_device_id tegra_pva_of_match[] = {
.name = "pva0",
.compatible = "nvidia,tegra234-pva-hv",
.data = (struct nvhost_device_data *)&t23x_pva0_info },
#ifdef CONFIG_TEGRA_T26X_GRHOST_PVA
{
.name = "pva0",
.compatible = "nvidia,tegra264-pva",
.data = (struct nvhost_device_data *)&t264_pva0_info },
#endif
{ },
};
@@ -768,9 +782,12 @@ static int nvpva_write_hwid(struct platform_device *pdev)
if (pva->version == PVA_HW_GEN1) {
id_idx = vm_regs_sid_idx_t19x;
reg_idx = vm_regs_reg_idx_t19x;
} else {
} else if (pva->version == PVA_HW_GEN2) {
id_idx = vm_regs_sid_idx_t234;
reg_idx = vm_regs_reg_idx_t234;
} else {
id_idx = vm_regs_sid_idx_t264;
reg_idx = vm_regs_reg_idx_t264;
}
/* Go through the StreamIDs and assemble register values */
@@ -1040,7 +1057,9 @@ static int pva_probe(struct platform_device *pdev)
struct pva *pva;
int err = 0;
size_t i;
#ifndef CONFIG_TEGRA_T26X_GRHOST_PVA
u32 offset;
#endif
#if !IS_ENABLED(CONFIG_TEGRA_GRHOST)
struct kobj_attribute *attr = NULL;
@@ -1069,7 +1088,7 @@ static int pva_probe(struct platform_device *pdev)
#endif
if ((pdata->version != PVA_HW_GEN1)
&& !is_cntxt_initialized()) {
&& !is_cntxt_initialized(pdata->version)) {
dev_warn(&pdev->dev,
"nvpva cntxt was not initialized, deferring probe.");
return -EPROBE_DEFER;
@@ -1096,8 +1115,15 @@ static int pva_probe(struct platform_device *pdev)
}
/* Initialize PVA private data */
if (pdata->version == PVA_HW_GEN2) {
if (pdata->version == PVA_HW_GEN3) {
pva->version = PVA_HW_GEN3;
pdata->firmware_name = "nvpva_030.fw";
pdata->firmware_not_in_subdir = true;
pva->submit_cmd_mode = PVA_SUBMIT_MODE_MMIO_CCQ;
pva->version_config = &pva_t23x_config;
} else if (pdata->version == PVA_HW_GEN2) {
pva->version = PVA_HW_GEN2;
dev_info(&pdev->dev, "pdata->version is HW_GEN2");
pdata->firmware_name = "nvpva_020.fw";
pdata->firmware_not_in_subdir = true;
pva->submit_cmd_mode = PVA_SUBMIT_MODE_MMIO_CCQ;
@@ -1192,19 +1218,29 @@ static int pva_probe(struct platform_device *pdev)
goto err_client_device_init;
}
if (pdata->version != PVA_HW_GEN1) {
dev_info(dev, "Completed nvhost_client_device_init\n");
if (pdata->version == PVA_HW_GEN1) {
pva->aux_pdev = pva->pdev;
} else if (pdata->version == PVA_HW_GEN2) {
pva->aux_pdev =
nvpva_iommu_context_dev_allocate(aux_dev_name,
aux_dev_name_len,
false);
} else {
#ifdef CONFIG_TEGRA_T26X_GRHOST_PVA
pva->aux_pdev =
nvpva_iommu_context_dev_allocate(aux_dev_name_t264,
aux_dev_name_len_t264,
false);
#endif
}
if (pva->aux_pdev == NULL) {
dev_err(&pva->pdev->dev,
"failed to allocate aux device");
goto err_context_alloc;
}
} else {
pva->aux_pdev = pva->pdev;
}
pva->pool = nvpva_queue_init(pdev, pva->aux_pdev, &pva_queue_ops,
MAX_PVA_QUEUE_COUNT);
@@ -1256,7 +1292,7 @@ static int pva_probe(struct platform_device *pdev)
pva->sid_count = 0;
err = nvpva_iommu_context_dev_get_sids(&pva->sids[1],
&pva->sid_count,
NVPVA_USER_VM_COUNT);
pdata->version);
if (err)
goto err_iommu_ctxt_init;
@@ -1268,6 +1304,7 @@ static int pva_probe(struct platform_device *pdev)
++(pva->sid_count);
#ifndef CONFIG_TEGRA_T26X_GRHOST_PVA
offset = hwpm_get_offset();
if ((UINT_MAX - offset) < pdev->resource[0].start) {
@@ -1282,6 +1319,7 @@ static int pva_probe(struct platform_device *pdev)
pva->hwpm_ip_ops.hwpm_ip_pm = &pva_hwpm_ip_pm;
pva->hwpm_ip_ops.hwpm_ip_reg_op = &pva_hwpm_ip_reg_op;
tegra_soc_hwpm_ip_register(&pva->hwpm_ip_ops);
#endif
#if !IS_ENABLED(CONFIG_TEGRA_GRHOST)
if (pdata->num_clks > 0) {

View File

@@ -54,8 +54,6 @@ struct pva_version_info {
#define MAX_PVA_TASK_COUNT_PER_QUEUE_SEG \
(MAX_PVA_TASK_COUNT_PER_QUEUE/MAX_PVA_SEG_COUNT_PER_QUEUE)
#define NVPVA_USER_VM_COUNT MAX_PVA_CLIENTS
/**
* Maximum task count that a PVA engine can support
*/
@@ -254,6 +252,7 @@ struct pva_status_interface_registers {
#define PVA_HW_GEN1 1
#define PVA_HW_GEN2 2
#define PVA_HW_GEN3 3
/**
* @brief HW version specific configuration and functions

View File

@@ -16,10 +16,23 @@
#include <linux/of.h>
#include <linux/version.h>
#include <linux/dma-buf.h>
#include <linux/nvhost.h>
#include <linux/platform_device.h>
#include "pva_iommu_context_dev.h"
#include "pva.h"
#define NVPVA_CNTXT_DEV_NAME_LEN_T23X (29U)
#define NVPVA_CNTXT_DEVICE_CNT (8U)
#ifdef CONFIG_TEGRA_T26X_GRHOST_PVA
#include "pva_cntxt_dev_name_t264.h"
#include "pva_iommu_context_dev_t264.h"
#else
#define NVPVA_CNTXT_DEV_NAME_LEN NVPVA_CNTXT_DEV_NAME_LEN_T23X
#define NVPVA_CNTXT_DEVICE_CNT_T264 NVPVA_CNTXT_DEVICE_CNT
#endif
static u32 cntxt_dev_count;
static char *dev_names[] = {
"pva0_niso1_ctx0",
@@ -30,6 +43,9 @@ static char *dev_names[] = {
"pva0_niso1_ctx5",
"pva0_niso1_ctx6",
"pva0_niso1_ctx7",
#ifdef CONFIG_TEGRA_T26X_GRHOST_PVA
PVA_CNTXT_DEV_NAME_T264
#endif
};
static const struct of_device_id pva_iommu_context_dev_of_match[] = {
@@ -49,22 +65,31 @@ struct pva_iommu_ctx {
static LIST_HEAD(pva_iommu_ctx_list);
static DEFINE_MUTEX(pva_iommu_ctx_list_mutex);
bool is_cntxt_initialized(void)
bool is_cntxt_initialized(const int hw_gen)
{
return (cntxt_dev_count == 8);
u32 pva_cntxt_dev_cnt = (hw_gen == PVA_HW_GEN3) ? NVPVA_CNTXT_DEVICE_CNT_T264
: NVPVA_CNTXT_DEVICE_CNT;
return (cntxt_dev_count == pva_cntxt_dev_cnt);
}
int nvpva_iommu_context_dev_get_sids(int *hwids, int *count, int max_cnt)
int nvpva_iommu_context_dev_get_sids(int *hwids, int *count, const int hw_gen)
{
struct pva_iommu_ctx *ctx;
int err = 0;
int i;
u32 pva_cntxt_dev_cnt;
if (hw_gen == PVA_HW_GEN3)
pva_cntxt_dev_cnt = NVPVA_CNTXT_DEVICE_CNT_T264;
else
pva_cntxt_dev_cnt = NVPVA_CNTXT_DEVICE_CNT;
*count = 0;
mutex_lock(&pva_iommu_ctx_list_mutex);
for (i = 0; i < max_cnt; i++) {
for (i = 0; i < pva_cntxt_dev_cnt; i++) {
list_for_each_entry(ctx, &pva_iommu_ctx_list, list) {
if (strnstr(ctx->pdev->name, dev_names[i], 29) != NULL) {
if (strnstr(ctx->pdev->name, dev_names[i],
NVPVA_CNTXT_DEV_NAME_LEN) != NULL) {
hwids[*count] = nvpva_get_device_hwid(ctx->pdev, 0);
if (hwids[*count] < 0) {
err = hwids[*count];
@@ -72,7 +97,7 @@ int nvpva_iommu_context_dev_get_sids(int *hwids, int *count, int max_cnt)
}
++(*count);
if (*count >= max_cnt)
if (*count >= pva_cntxt_dev_cnt)
break;
}
}
@@ -167,7 +192,7 @@ static int pva_iommu_context_dev_probe(struct platform_device *pdev)
return -ENOMEM;
}
if (strnstr(pdev->name, dev_names[7], 29) != NULL)
if (strnstr(pdev->name, dev_names[7], NVPVA_CNTXT_DEV_NAME_LEN) != NULL)
dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
else
dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(39));

View File

@@ -11,7 +11,7 @@
struct platform_device
*nvpva_iommu_context_dev_allocate(char *identifier, size_t len, bool shared);
void nvpva_iommu_context_dev_release(struct platform_device *pdev);
int nvpva_iommu_context_dev_get_sids(int *hwids, int *count, int max_cnt);
bool is_cntxt_initialized(void);
int nvpva_iommu_context_dev_get_sids(int *hwids, int *count, const int hw_gen);
bool is_cntxt_initialized(const int hw_gen);
#endif

View File

@@ -198,10 +198,14 @@ pva_task_pin_fence(struct pva_submit_task *task,
if (IS_ERR(mem)) {
task_err(task, "sempahore submit pin failed");
err = PTR_ERR(mem);
} else
} else {
*addr = mem->dma_addr + fence->obj.sem.mem.offset;
*serial_id = mem->serial_id;
nvpva_dbg_info(task->pva,
"id = %d, semaphore addr = %llx",
fence->obj.sem.mem.pin_id, *addr);
}
break;
}
case NVPVA_FENCE_OBJ_SYNCPT: {
@@ -1257,7 +1261,7 @@ set_task_parameters(const struct pva_submit_tasks *task_header)
* thus the response should come in the correct CCQ
*/
if ((task->pva->submit_task_mode == PVA_SUBMIT_MODE_MMIO_CCQ)
&& (task_header->tasks[0]->pva->version == PVA_HW_GEN2))
&& (task_header->tasks[0]->pva->version != PVA_HW_GEN1))
status_interface = (task->queue->id + 1U);
for (idx = 0U; idx < task_header->num_tasks; idx++) {

View File

@@ -12,6 +12,12 @@
#include "hw_vmem_pva.h"
#include "pva_vpu_exe.h"
#ifdef CONFIG_TEGRA_T26X_GRHOST_PVA
#include "hw_vmem_pva_t264.h"
#else
#define VMEM_REGION_COUNT_T26x 4
#endif
#define ELF_MAXIMUM_SECTION_NAME 64
#define ELF_EXPORTS_SECTION "EXPORTS"
#define DATA_SECTION_ALIGNMENT 32
@@ -1123,16 +1129,22 @@ struct vmem_region {
uint32_t end;
};
struct vmem_region vmem_regions_tab[NUM_HEM_GEN + 1][VMEM_REGION_COUNT] = {
struct vmem_region vmem_regions_tab[NUM_HEM_GEN + 1][VMEM_REGION_COUNT_T26x] = {
{{.start = 0, .end = 0},
{.start = 0, .end = 0},
{.start = 0, .end = 0},
{.start = 0, .end = 0}},
{{.start = T19X_VMEM0_START, .end = T19X_VMEM0_END},
{.start = T19X_VMEM1_START, .end = T19X_VMEM1_END},
{.start = T19X_VMEM2_START, .end = T19X_VMEM2_END}},
{.start = T19X_VMEM2_START, .end = T19X_VMEM2_END},
{.start = 0xFFFFFFFF, .end = 0}},
{{.start = T23x_VMEM0_START, .end = T23x_VMEM0_END},
{.start = T23x_VMEM1_START, .end = T23x_VMEM1_END},
{.start = T23x_VMEM2_START, .end = T23x_VMEM2_END}},
{.start = T23x_VMEM2_START, .end = T23x_VMEM2_END},
{.start = 0xFFFFFFFF, .end = 0}},
#ifdef CONFIG_TEGRA_T26X_GRHOST_PVA
#include "pva_vmem_regions_tab_t264.h"
#endif
};
int32_t
@@ -1143,13 +1155,15 @@ nvpva_validate_vmem_offset(const uint32_t vmem_offset,
int i;
int32_t err = -EINVAL;
const u8 vmem_region_count = (hw_gen == PVA_HW_GEN3) ? VMEM_REGION_COUNT_T26x
: VMEM_REGION_COUNT_T23x;
if (hw_gen < 0 || hw_gen > NUM_HEM_GEN) {
pr_err("invalid hw_gen index: %d", hw_gen);
return err;
}
for (i = VMEM_REGION_COUNT; i > 0; i--) {
for (i = vmem_region_count; i > 0; i--) {
if (vmem_offset >= vmem_regions_tab[hw_gen][i-1].start)
break;
}