From 7fb397b0b37d904c08e63cc8026c6ea058662058 Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Mon, 8 Apr 2019 14:58:54 -0700 Subject: [PATCH] gpu: nvgpu: add format_gpfifo_entry HAL for pbdma Removed dependency on pbdma hw headers in ce2, cde and submit. Added the following HAL to format gpfifo entries: - pbdma.format_gpfifo_entry Jira NVGPU-1992 Jira NVGPU-1990 Change-Id: I322d6bcd832b0ea5bbe2c2871b8f96b2793d8a65 Signed-off-by: Thomas Fleury Reviewed-on: https://git-master.nvidia.com/r/2093502 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/ce2.c | 39 +++++++++---------- drivers/gpu/nvgpu/common/fifo/submit.c | 16 ++++---- .../nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c | 1 + .../nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c | 1 + drivers/gpu/nvgpu/hal/fifo/pbdma_gm20b.c | 9 +++++ drivers/gpu/nvgpu/hal/fifo/pbdma_gm20b.h | 5 +++ drivers/gpu/nvgpu/hal/init/hal_gm20b.c | 2 + drivers/gpu/nvgpu/hal/init/hal_gp10b.c | 2 + drivers/gpu/nvgpu/hal/init/hal_gv100.c | 2 + drivers/gpu/nvgpu/hal/init/hal_gv11b.c | 2 + drivers/gpu/nvgpu/hal/init/hal_tu104.c | 2 + drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 3 ++ drivers/gpu/nvgpu/os/linux/cde.c | 13 ++----- 13 files changed, 58 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/nvgpu/common/ce2.c b/drivers/gpu/nvgpu/common/ce2.c index 00ae7f404..8245fd931 100644 --- a/drivers/gpu/nvgpu/common/ce2.c +++ b/drivers/gpu/nvgpu/common/ce2.c @@ -26,8 +26,6 @@ #include #include -#include - #include "gk20a/ce2_gk20a.h" static inline u32 gk20a_get_valid_launch_flags(struct gk20a *g, u32 launch_flags) @@ -58,7 +56,7 @@ int gk20a_ce_execute_ops(struct gk20a *g, bool found = false; u32 *cmd_buf_cpu_va; u64 cmd_buf_gpu_va = 0; - u32 methodSize; + u32 method_size; u32 cmd_buf_read_offset; u32 dma_copy_class; struct nvgpu_gpfifo_entry gpfifo; @@ -115,28 +113,29 @@ int gk20a_ce_execute_ops(struct gk20a *g, } } - cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32))); + cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + + (u64)(cmd_buf_read_offset *sizeof(u32))); dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS); - methodSize = gk20a_ce_prepare_submit(src_buf, - dst_buf, - size, - &cmd_buf_cpu_va[cmd_buf_read_offset], - NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF, - payload, - gk20a_get_valid_launch_flags(g, launch_flags), - request_operation, - dma_copy_class); + method_size = gk20a_ce_prepare_submit(src_buf, + dst_buf, + size, + &cmd_buf_cpu_va[cmd_buf_read_offset], + NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF, + payload, + gk20a_get_valid_launch_flags(g, launch_flags), + request_operation, + dma_copy_class); - if (methodSize != 0U) { + if (method_size != 0U) { /* store the element into gpfifo */ - gpfifo.entry0 = - u64_lo32(cmd_buf_gpu_va); - gpfifo.entry1 = - (u64_hi32(cmd_buf_gpu_va) | - pbdma_gp_entry1_length_f(methodSize)); + g->ops.pbdma.format_gpfifo_entry(g, &gpfifo, + cmd_buf_gpu_va, method_size); - /* take always the postfence as it is needed for protecting the ce context */ + /* + * take always the postfence as it is needed for protecting the + * ce context + */ submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; nvgpu_smp_wmb(); diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index f9aea83cd..dcd82f515 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -32,8 +32,6 @@ #include -#include - /* * Handle the submit synchronization - pre-fences and post-fences. */ @@ -189,14 +187,14 @@ static void nvgpu_submit_append_priv_cmdbuf(struct channel_gk20a *c, { struct gk20a *g = c->g; struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem; - struct nvgpu_gpfifo_entry x = { - .entry0 = u64_lo32(cmd->gva), - .entry1 = u64_hi32(cmd->gva) | - pbdma_gp_entry1_length_f(cmd->size) - }; + struct nvgpu_gpfifo_entry gpfifo_entry; - nvgpu_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * (u32)sizeof(x), - &x, (u32)sizeof(x)); + g->ops.pbdma.format_gpfifo_entry(g, &gpfifo_entry, + cmd->gva, cmd->size); + + nvgpu_mem_wr_n(g, gpfifo_mem, + c->gpfifo.put * (u32)sizeof(gpfifo_entry), + &gpfifo_entry, (u32)sizeof(gpfifo_entry)); if (cmd->mem->aperture == APERTURE_SYSMEM) { trace_gk20a_push_cmdbuf(g->name, 0, cmd->size, 0, diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index fab545e96..f526d706b 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -456,6 +456,7 @@ static const struct gpu_ops vgpu_gp10b_ops = { .device_fatal_0_intr_descs = NULL, .channel_fatal_0_intr_descs = NULL, .restartable_0_intr_descs = NULL, + .format_gpfifo_entry = gm20b_pbdma_format_gpfifo_entry, }, .sync = { #ifdef CONFIG_TEGRA_GK20A_NVHOST diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index f79a7f00a..21bb15d4e 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -541,6 +541,7 @@ static const struct gpu_ops vgpu_gv11b_ops = { .device_fatal_0_intr_descs = NULL, .channel_fatal_0_intr_descs = NULL, .restartable_0_intr_descs = NULL, + .format_gpfifo_entry = gm20b_pbdma_format_gpfifo_entry, }, .sync = { #ifdef CONFIG_TEGRA_GK20A_NVHOST diff --git a/drivers/gpu/nvgpu/hal/fifo/pbdma_gm20b.c b/drivers/gpu/nvgpu/hal/fifo/pbdma_gm20b.c index ecdbc6a63..b04a04a0a 100644 --- a/drivers/gpu/nvgpu/hal/fifo/pbdma_gm20b.c +++ b/drivers/gpu/nvgpu/hal/fifo/pbdma_gm20b.c @@ -363,6 +363,15 @@ u32 gm20b_pbdma_read_data(struct gk20a *g, u32 pbdma_id) return nvgpu_readl(g, pbdma_hdr_shadow_r(pbdma_id)); } +void gm20b_pbdma_format_gpfifo_entry(struct gk20a *g, + struct nvgpu_gpfifo_entry *gpfifo_entry, + u64 pb_gpu_va, u32 method_size) +{ + gpfifo_entry->entry0 = u64_lo32(pb_gpu_va); + gpfifo_entry->entry1 = u64_hi32(pb_gpu_va) | + pbdma_gp_entry1_length_f(method_size); +} + u32 gm20b_pbdma_device_fatal_0_intr_descs(void) { /* diff --git a/drivers/gpu/nvgpu/hal/fifo/pbdma_gm20b.h b/drivers/gpu/nvgpu/hal/fifo/pbdma_gm20b.h index f6d419ad5..547aa42f3 100644 --- a/drivers/gpu/nvgpu/hal/fifo/pbdma_gm20b.h +++ b/drivers/gpu/nvgpu/hal/fifo/pbdma_gm20b.h @@ -28,6 +28,7 @@ struct gk20a; struct gk20a_debug_output; struct nvgpu_channel_dump_info; +struct nvgpu_gpfifo_entry; void gm20b_pbdma_intr_enable(struct gk20a *g, bool enable); @@ -45,6 +46,10 @@ void gm20b_pbdma_reset_method(struct gk20a *g, u32 pbdma_id, u32 gm20b_pbdma_acquire_val(u64 timeout); void gm20b_pbdma_dump_status(struct gk20a *g, struct gk20a_debug_output *o); +void gm20b_pbdma_format_gpfifo_entry(struct gk20a *g, + struct nvgpu_gpfifo_entry *gpfifo_entry, + u64 pb_gpu_va, u32 method_size); + u32 gm20b_pbdma_device_fatal_0_intr_descs(void); u32 gm20b_pbdma_channel_fatal_0_intr_descs(void); u32 gm20b_pbdma_restartable_0_intr_descs(void); diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c index f0da6e625..27be7423b 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c @@ -687,6 +687,8 @@ static const struct gpu_ops gm20b_ops = { .restartable_0_intr_descs = gm20b_pbdma_restartable_0_intr_descs, .find_for_runlist = nvgpu_pbdma_find_for_runlist, + .format_gpfifo_entry = + gm20b_pbdma_format_gpfifo_entry, }, .sync = { #ifdef CONFIG_TEGRA_GK20A_NVHOST diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c index 2dea47bff..1ab1b585e 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c @@ -780,6 +780,8 @@ static const struct gpu_ops gp10b_ops = { .restartable_0_intr_descs = gm20b_pbdma_restartable_0_intr_descs, .find_for_runlist = nvgpu_pbdma_find_for_runlist, + .format_gpfifo_entry = + gm20b_pbdma_format_gpfifo_entry, }, .sync = { #ifdef CONFIG_TEGRA_GK20A_NVHOST diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv100.c b/drivers/gpu/nvgpu/hal/init/hal_gv100.c index eb92cd1bc..d53142b45 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv100.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv100.c @@ -951,6 +951,8 @@ static const struct gpu_ops gv100_ops = { .restartable_0_intr_descs = gm20b_pbdma_restartable_0_intr_descs, .find_for_runlist = nvgpu_pbdma_find_for_runlist, + .format_gpfifo_entry = + gm20b_pbdma_format_gpfifo_entry, }, .sync = { #ifdef CONFIG_TEGRA_GK20A_NVHOST diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index 861d32279..195cff1e0 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -924,6 +924,8 @@ static const struct gpu_ops gv11b_ops = { .restartable_0_intr_descs = gm20b_pbdma_restartable_0_intr_descs, .find_for_runlist = nvgpu_pbdma_find_for_runlist, + .format_gpfifo_entry = + gm20b_pbdma_format_gpfifo_entry, }, .sync = { #ifdef CONFIG_TEGRA_GK20A_NVHOST diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index bfc933569..8e284c68e 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -985,6 +985,8 @@ static const struct gpu_ops tu104_ops = { .restartable_0_intr_descs = gm20b_pbdma_restartable_0_intr_descs, .find_for_runlist = nvgpu_pbdma_find_for_runlist, + .format_gpfifo_entry = + gm20b_pbdma_format_gpfifo_entry, }, .sync = { #ifdef CONFIG_TEGRA_GK20A_NVHOST diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index c2cb6c2a7..1061eb350 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -1106,6 +1106,9 @@ struct gpu_ops { u32 (*restartable_0_intr_descs)(void); bool (*find_for_runlist)(struct gk20a *g, u32 runlist_id, u32 *pbdma_id); + void (*format_gpfifo_entry)(struct gk20a *g, + struct nvgpu_gpfifo_entry *gpfifo_entry, + u64 pb_gpu_va, u32 method_size); } pbdma; struct { diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c index c9f4dc928..37774b4b7 100644 --- a/drivers/gpu/nvgpu/os/linux/cde.c +++ b/drivers/gpu/nvgpu/os/linux/cde.c @@ -53,8 +53,6 @@ #include "cde_gm20b.h" #include "cde_gp10b.h" -#include - static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l); @@ -617,14 +615,9 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, } /* store the element into gpfifo */ - gpfifo_elem->entry0 = - u64_lo32(target_mem->gpu_va + - cmd_elem->target_byte_offset); - gpfifo_elem->entry1 = - u64_hi32(target_mem->gpu_va + - cmd_elem->target_byte_offset) | - pbdma_gp_entry1_length_f(cmd_elem->num_bytes / - sizeof(u32)); + g->ops.pbdma.format_gpfifo_entry(g, gpfifo_elem, + target_mem->gpu_va + cmd_elem->target_byte_offset, + cmd_elem->num_bytes / sizeof(u32)); } *num_entries = num_elems;