From 1160f083d4bb3e40de9cd80d331778710ce195aa Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Mon, 8 Apr 2019 16:00:45 -0700 Subject: [PATCH] gpu: nvgpu: move ce code to common/ce and hal/ce Merged gk20a_ce_delete_context and gk20a_ce_delete_context_priv. Renamed - gk20a_init_ce_support -> nvgpu_ce_init_support - gk20a_ce_destroy -> nvgpu_ce_destroy - gk20a_ce_suspend -> nvgpu_ce_suspend - gk20a_ce_create_context -> nvgpu_ce_create_context - gk20a_ce_delete_context -> nvgpu_ce_delete_context - gk20a_ce_execute_ops -> nvgpu_ce_execute_ops - gk20a_ce_prepare_submit -> nvgpu_ce_prepare_submit - gk20a_ce_put_fences -> nvgpu_ce_put_fences - gk20a_ce_delete_gpu_context -> nvgpu_ce_delete_gpu_context - gk20a_ce_get_method_size -> nvgpu_ce_get_method_size - gk20a_gpu_ctx -> nvgpu_ce_gpu_ctx - gk20a_gpu_ctx_from_list -> nvgpu_ce_gpu_ctx_from_list - gk20a_ce_app -> nvgpu_ce_app - gk20a_ce_debugfs_init -> nvgpu_ce_debugfs_init - gk20a_get_valid_launch_flags -> nvgpu_ce_get_valid_launch_flags - gk20a_ce2_isr -> gk20a_ce2_stall_isr - gp10b_ce_isr -> gp10b_ce_stall_isr - gv11b_ce_isr -> gv11b_ce_stall_isr Inlined - ce*_nonblockpipe_isr - ce*_blockpipe_isr - ce*_launcherr_isr Added ce_priv.h for ce private definitions. Moved files to common/ce and hal/fifo/ce - ce2.c -> common/ce2/ce.c - ce2_gk20a.c -> hal/ce/ce2_gk20a.c - ce2_gk20a.h -> hal/ce/ce2_gk20a.h - ce_gp10b.c -> hal/ce/ce_gp10b.c - ce_gp10b.h -> hal/ce/ce_gp10b.h - ce_gv11b.c -> hal/ce/ce_gv11b.c - ce_gv11b.h -> hal/ce/ce_gv11b.h Updated makefiles and #include directives Jira NVGPU-1992 Change-Id: Ia6064bf51b7a254085be43a112d056cb6fb6c3b2 Signed-off-by: Thomas Fleury Reviewed-on: https://git-master.nvidia.com/r/2093503 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 8 +- drivers/gpu/nvgpu/Makefile.sources | 8 +- .../{gk20a/ce2_gk20a.c => common/ce/ce.c} | 276 +++++++++++------- drivers/gpu/nvgpu/common/ce/ce_priv.h | 90 ++++++ drivers/gpu/nvgpu/common/ce2.c | 164 ----------- drivers/gpu/nvgpu/common/fifo/engines.c | 4 +- drivers/gpu/nvgpu/common/init/nvgpu_init.c | 9 +- drivers/gpu/nvgpu/common/mm/mm.c | 7 +- drivers/gpu/nvgpu/common/mm/vidmem.c | 8 +- .../nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c | 5 +- .../nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c | 6 +- drivers/gpu/nvgpu/hal/ce/ce2_gk20a.c | 76 +++++ drivers/gpu/nvgpu/hal/ce/ce2_gk20a.h | 32 ++ .../gpu/nvgpu/{gp10b => hal/ce}/ce_gp10b.c | 39 +-- .../gpu/nvgpu/{gp10b => hal/ce}/ce_gp10b.h | 4 +- .../gpu/nvgpu/{gv11b => hal/ce}/ce_gv11b.c | 47 +-- .../gpu/nvgpu/{gv11b => hal/ce}/ce_gv11b.h | 4 +- drivers/gpu/nvgpu/hal/fb/fb_gv11b.c | 4 +- drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.c | 2 +- drivers/gpu/nvgpu/hal/init/hal_gm20b.c | 9 +- drivers/gpu/nvgpu/hal/init/hal_gp10b.c | 8 +- drivers/gpu/nvgpu/hal/init/hal_gv100.c | 10 +- drivers/gpu/nvgpu/hal/init/hal_gv11b.c | 10 +- drivers/gpu/nvgpu/hal/init/hal_tu104.c | 9 +- drivers/gpu/nvgpu/hal/mc/mc_gm20b.c | 8 +- drivers/gpu/nvgpu/hal/mc/mc_gp10b.c | 4 +- .../{gk20a/ce2_gk20a.h => include/nvgpu/ce.h} | 86 +----- drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 6 +- drivers/gpu/nvgpu/os/linux/debug.c | 2 +- drivers/gpu/nvgpu/os/linux/debug_ce.c | 8 +- drivers/gpu/nvgpu/os/linux/debug_ce.h | 4 +- 31 files changed, 488 insertions(+), 469 deletions(-) rename drivers/gpu/nvgpu/{gk20a/ce2_gk20a.c => common/ce/ce.c} (70%) create mode 100644 drivers/gpu/nvgpu/common/ce/ce_priv.h delete mode 100644 drivers/gpu/nvgpu/common/ce2.c create mode 100644 drivers/gpu/nvgpu/hal/ce/ce2_gk20a.c create mode 100644 drivers/gpu/nvgpu/hal/ce/ce2_gk20a.h rename drivers/gpu/nvgpu/{gp10b => hal/ce}/ce_gp10b.c (69%) rename drivers/gpu/nvgpu/{gp10b => hal/ce}/ce_gp10b.h (90%) rename drivers/gpu/nvgpu/{gv11b => hal/ce}/ce_gv11b.c (76%) rename drivers/gpu/nvgpu/{gv11b => hal/ce}/ce_gv11b.h (91%) rename drivers/gpu/nvgpu/{gk20a/ce2_gk20a.h => include/nvgpu/ce.h} (61%) diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index cc57a4548..4457abb1f 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -216,6 +216,9 @@ nvgpu-y += \ hal/regops/regops_gv100.o \ hal/regops/regops_gv11b.o \ hal/regops/regops_tu104.o \ + hal/ce/ce2_gk20a.o \ + hal/ce/ce_gp10b.o \ + hal/ce/ce_gv11b.o \ hal/therm/therm_gm20b.o \ hal/therm/therm_gp10b.o \ hal/therm/therm_gp106.o \ @@ -498,9 +501,8 @@ nvgpu-y += \ common/fifo/userd.o \ common/fence/fence.o \ common/ecc.o \ - common/ce2.o \ + common/ce/ce.o \ common/debugger.o \ - gk20a/ce2_gk20a.o \ gk20a/fifo_gk20a.o \ gk20a/gr_gk20a.o \ gk20a/mm_gk20a.o \ @@ -551,13 +553,11 @@ nvgpu-$(CONFIG_GK20A_CYCLE_STATS) += \ nvgpu-y += \ gp10b/gr_gp10b.o \ - gp10b/ce_gp10b.o \ gp10b/mm_gp10b.o \ gp106/bios_gp106.o \ gv11b/gr_gv11b.o \ gv11b/fifo_gv11b.o \ gv11b/mm_gv11b.o \ - gv11b/ce_gv11b.o \ gv100/mm_gv100.o \ gv100/bios_gv100.o \ gv100/fifo_gv100.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 4e34c48b4..d162bea47 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -92,7 +92,7 @@ srcs += common/sim.c \ common/cbc/cbc.c \ common/io/io.c \ common/ecc.c \ - common/ce2.c \ + common/ce/ce.c \ common/vbios/bios.c \ common/falcon/falcon.c \ common/falcon/falcon_sw_gk20a.c \ @@ -239,7 +239,6 @@ srcs += common/sim.c \ common/nvlink/nvlink_tu104.c \ common/nvlink/nvlink.c \ common/fence/fence.c \ - gk20a/ce2_gk20a.c \ gk20a/fifo_gk20a.c \ gk20a/gr_gk20a.c \ gk20a/mm_gk20a.c \ @@ -247,12 +246,10 @@ srcs += common/sim.c \ gm20b/clk_gm20b.c \ gm20b/mm_gm20b.c \ gp10b/gr_gp10b.c \ - gp10b/ce_gp10b.c \ gp10b/mm_gp10b.c \ gv11b/gr_gv11b.c \ gv11b/fifo_gv11b.c \ gv11b/mm_gv11b.c \ - gv11b/ce_gv11b.c \ gp106/bios_gp106.c \ gv100/mm_gv100.c \ gv100/bios_gv100.c \ @@ -334,6 +331,9 @@ srcs += common/sim.c \ hal/regops/regops_gv100.c \ hal/regops/regops_gv11b.c \ hal/regops/regops_tu104.c \ + hal/ce/ce2_gk20a.c \ + hal/ce/ce_gp10b.c \ + hal/ce/ce_gv11b.c \ hal/therm/therm_gm20b.c \ hal/therm/therm_gp10b.c \ hal/therm/therm_gv11b.c \ diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/common/ce/ce.c similarity index 70% rename from drivers/gpu/nvgpu/gk20a/ce2_gk20a.c rename to drivers/gpu/nvgpu/common/ce/ce.c index 1de7faf7d..ace0d5294 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c +++ b/drivers/gpu/nvgpu/common/ce/ce.c @@ -1,7 +1,5 @@ /* - * GK20A Graphics Copy Engine (gr host) - * - * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -22,99 +20,164 @@ * DEALINGS IN THE SOFTWARE. */ -#include -#include -#include -#include -#include -#include -#include +#include #include +#include #include -#include -#include +#include +#include #include -#include +#include +#include -#include "gk20a/ce2_gk20a.h" +#include "common/ce/ce_priv.h" -#include -#include -#include -#include - -/* - * Copy engine defines line size in pixels - */ -#define MAX_CE_SHIFT 31U /* 4Gpixels -1 */ -#define MAX_CE_MASK ((u32) (~(~0U << MAX_CE_SHIFT))) -#define MAX_CE_ALIGN(a) ((a) & MAX_CE_MASK) - - -static u32 ce2_nonblockpipe_isr(struct gk20a *g, u32 fifo_intr) +static inline u32 nvgpu_ce_get_valid_launch_flags(struct gk20a *g, + u32 launch_flags) { - nvgpu_log(g, gpu_dbg_intr, "ce2 non-blocking pipe interrupt\n"); - - return ce2_intr_status_nonblockpipe_pending_f(); + /* + * there is no local memory available, + * don't allow local memory related CE flags + */ + if (g->mm.vidmem.size == 0ULL) { + launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB | + NVGPU_CE_DST_LOCATION_LOCAL_FB); + } + return launch_flags; } -static u32 ce2_blockpipe_isr(struct gk20a *g, u32 fifo_intr) +int nvgpu_ce_execute_ops(struct gk20a *g, + u32 ce_ctx_id, + u64 src_buf, + u64 dst_buf, + u64 size, + unsigned int payload, + u32 launch_flags, + u32 request_operation, + u32 submit_flags, + struct nvgpu_fence_type **fence_out) { - nvgpu_log(g, gpu_dbg_intr, "ce2 blocking pipe interrupt\n"); + int ret = -EPERM; + struct nvgpu_ce_app *ce_app = g->ce_app; + struct nvgpu_ce_gpu_ctx *ce_ctx, *ce_ctx_save; + bool found = false; + u32 *cmd_buf_cpu_va; + u64 cmd_buf_gpu_va = 0UL; + u32 method_size; + u32 cmd_buf_read_offset; + u32 dma_copy_class; + struct nvgpu_gpfifo_entry gpfifo; + struct nvgpu_channel_fence fence = {0U, 0U}; + struct nvgpu_fence_type *ce_cmd_buf_fence_out = NULL; - return ce2_intr_status_blockpipe_pending_f(); -} - -static u32 ce2_launcherr_isr(struct gk20a *g, u32 fifo_intr) -{ - nvgpu_log(g, gpu_dbg_intr, "ce2 launch error interrupt\n"); - - return ce2_intr_status_launcherr_pending_f(); -} - -void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base) -{ - u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r()); - u32 clear_intr = 0; - - nvgpu_log(g, gpu_dbg_intr, "ce2 isr %08x\n", ce2_intr); - - /* clear blocking interrupts: they exibit broken behavior */ - if ((ce2_intr & ce2_intr_status_blockpipe_pending_f()) != 0U) { - clear_intr |= ce2_blockpipe_isr(g, ce2_intr); + if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) { + goto end; } - if ((ce2_intr & ce2_intr_status_launcherr_pending_f()) != 0U) { - clear_intr |= ce2_launcherr_isr(g, ce2_intr); + nvgpu_mutex_acquire(&ce_app->app_mutex); + + nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, + &ce_app->allocated_contexts, nvgpu_ce_gpu_ctx, list) { + if (ce_ctx->ctx_id == ce_ctx_id) { + found = true; + break; + } } - gk20a_writel(g, ce2_intr_status_r(), clear_intr); - return; -} + nvgpu_mutex_release(&ce_app->app_mutex); -u32 gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) -{ - u32 ops = 0; - u32 ce2_intr = gk20a_readl(g, ce2_intr_status_r()); - - nvgpu_log(g, gpu_dbg_intr, "ce2 nonstall isr %08x\n", ce2_intr); - - if ((ce2_intr & ce2_intr_status_nonblockpipe_pending_f()) != 0U) { - gk20a_writel(g, ce2_intr_status_r(), - ce2_nonblockpipe_isr(g, ce2_intr)); - ops |= (GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE | - GK20A_NONSTALL_OPS_POST_EVENTS); + if (!found) { + ret = -EINVAL; + goto end; } - return ops; + + if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) { + ret = -ENODEV; + goto end; + } + + nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); + + ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS; + + cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * + (NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / + U32(sizeof(u32)))); + + cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; + + if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] != NULL) { + struct nvgpu_fence_type **prev_post_fence = + &ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]; + + ret = nvgpu_fence_wait(g, *prev_post_fence, + nvgpu_get_poll_timeout(g)); + + nvgpu_fence_put(*prev_post_fence); + *prev_post_fence = NULL; + if (ret != 0) { + goto noop; + } + } + + cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + + (u64)(cmd_buf_read_offset * sizeof(u32))); + + dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS); + method_size = nvgpu_ce_prepare_submit(src_buf, + dst_buf, + size, + &cmd_buf_cpu_va[cmd_buf_read_offset], + NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF, + payload, + nvgpu_ce_get_valid_launch_flags(g, launch_flags), + request_operation, + dma_copy_class); + + if (method_size != 0U) { + /* store the element into gpfifo */ + g->ops.pbdma.format_gpfifo_entry(g, &gpfifo, + cmd_buf_gpu_va, method_size); + + /* + * take always the postfence as it is needed for protecting the + * ce context + */ + submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; + + nvgpu_smp_wmb(); + + ret = nvgpu_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo, + 1, submit_flags, &fence, &ce_cmd_buf_fence_out); + + if (ret == 0) { + ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] = + ce_cmd_buf_fence_out; + if (fence_out != NULL) { + nvgpu_fence_get(ce_cmd_buf_fence_out); + *fence_out = ce_cmd_buf_fence_out; + } + + /* Next available command buffer queue Index */ + ++ce_ctx->cmd_buf_read_queue_offset; + } + } else { + ret = -ENOMEM; + } +noop: + nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex); +end: + return ret; } /* static CE app api */ -static void gk20a_ce_put_fences(struct gk20a_gpu_ctx *ce_ctx) +static void nvgpu_ce_put_fences(struct nvgpu_ce_gpu_ctx *ce_ctx) { u32 i; - for (i = 0; i < NVGPU_CE_MAX_INFLIGHT_JOBS; i++) { + for (i = 0U; i < NVGPU_CE_MAX_INFLIGHT_JOBS; i++) { struct nvgpu_fence_type **fence = &ce_ctx->postfences[i]; + if (*fence != NULL) { nvgpu_fence_put(*fence); } @@ -122,8 +185,8 @@ static void gk20a_ce_put_fences(struct gk20a_gpu_ctx *ce_ctx) } } -/* assume this api should need to call under nvgpu_mutex_acquire(&ce_app->app_mutex) */ -static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx) +/* caller must hold ce_app->app_mutex */ +static void nvgpu_ce_delete_gpu_context_locked(struct nvgpu_ce_gpu_ctx *ce_ctx) { struct nvgpu_list_node *list = &ce_ctx->list; @@ -133,7 +196,7 @@ static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx) nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); if (nvgpu_mem_is_valid(&ce_ctx->cmd_buf_mem)) { - gk20a_ce_put_fences(ce_ctx); + nvgpu_ce_put_fences(ce_ctx); nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem); } @@ -155,7 +218,7 @@ static void gk20a_ce_delete_gpu_context(struct gk20a_gpu_ctx *ce_ctx) nvgpu_kfree(ce_ctx->g, ce_ctx); } -static inline unsigned int gk20a_ce_get_method_size(u32 request_operation, +static inline unsigned int nvgpu_ce_get_method_size(u32 request_operation, u64 size) { /* failure size */ @@ -188,7 +251,7 @@ static inline unsigned int gk20a_ce_get_method_size(u32 request_operation, return methodsize; } -u32 gk20a_ce_prepare_submit(u64 src_buf, +u32 nvgpu_ce_prepare_submit(u64 src_buf, u64 dst_buf, u64 size, u32 *cmd_buf_cpu_va, @@ -205,7 +268,7 @@ u32 gk20a_ce_prepare_submit(u64 src_buf, u64 chunk = size; /* failure case handling */ - if ((gk20a_ce_get_method_size(request_operation, size) > + if ((nvgpu_ce_get_method_size(request_operation, size) > max_cmd_buf_size) || (size == 0ULL) || (request_operation > NVGPU_CE_MEMSET)) { return 0; @@ -266,7 +329,7 @@ u32 gk20a_ce_prepare_submit(u64 src_buf, NVGPU_CE_SRC_LOCATION_LOCAL_FB) != 0U) { cmd_buf_cpu_va[methodSize++] = 0x00000000; } else if ((launch_flags & - NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM) != 0U) { + NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM) != 0U) { cmd_buf_cpu_va[methodSize++] = 0x00000002; } else { cmd_buf_cpu_va[methodSize++] = 0x00000001; @@ -336,9 +399,9 @@ u32 gk20a_ce_prepare_submit(u64 src_buf, } /* global CE app related apis */ -int gk20a_init_ce_support(struct gk20a *g) +int nvgpu_ce_init_support(struct gk20a *g) { - struct gk20a_ce_app *ce_app = g->ce_app; + struct nvgpu_ce_app *ce_app = g->ce_app; int err; u32 ce_reset_mask; @@ -385,10 +448,10 @@ int gk20a_init_ce_support(struct gk20a *g) return 0; } -void gk20a_ce_destroy(struct gk20a *g) +void nvgpu_ce_destroy(struct gk20a *g) { - struct gk20a_ce_app *ce_app = g->ce_app; - struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; + struct nvgpu_ce_app *ce_app = g->ce_app; + struct nvgpu_ce_gpu_ctx *ce_ctx, *ce_ctx_save; if (ce_app == NULL) { return; @@ -404,8 +467,8 @@ void gk20a_ce_destroy(struct gk20a *g) nvgpu_mutex_acquire(&ce_app->app_mutex); nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, - &ce_app->allocated_contexts, gk20a_gpu_ctx, list) { - gk20a_ce_delete_gpu_context(ce_ctx); + &ce_app->allocated_contexts, nvgpu_ce_gpu_ctx, list) { + nvgpu_ce_delete_gpu_context_locked(ce_ctx); } nvgpu_init_list_node(&ce_app->allocated_contexts); @@ -420,27 +483,25 @@ free: g->ce_app = NULL; } -void gk20a_ce_suspend(struct gk20a *g) +void nvgpu_ce_suspend(struct gk20a *g) { - struct gk20a_ce_app *ce_app = g->ce_app; + struct nvgpu_ce_app *ce_app = g->ce_app; if (ce_app == NULL || !ce_app->initialised) { return; } ce_app->app_state = NVGPU_CE_SUSPEND; - - return; } /* CE app utility functions */ -u32 gk20a_ce_create_context(struct gk20a *g, +u32 nvgpu_ce_create_context(struct gk20a *g, u32 runlist_id, int timeslice, int runlist_level) { - struct gk20a_gpu_ctx *ce_ctx; - struct gk20a_ce_app *ce_app = g->ce_app; + struct nvgpu_ce_gpu_ctx *ce_ctx; + struct nvgpu_ce_app *ce_app = g->ce_app; struct nvgpu_setup_bind_args setup_bind_args; u32 ctx_id = NVGPU_CE_INVAL_CTX_ID; int err = 0; @@ -516,7 +577,7 @@ u32 gk20a_ce_create_context(struct gk20a *g, &ce_ctx->cmd_buf_mem); if (err != 0) { nvgpu_err(g, - "ce: could not allocate command buffer for CE context"); + "ce: alloc command buffer failed"); goto end; } @@ -527,8 +588,7 @@ u32 gk20a_ce_create_context(struct gk20a *g, if (timeslice != -1) { err = gk20a_fifo_tsg_set_timeslice(ce_ctx->tsg, timeslice); if (err != 0) { - nvgpu_err(g, - "ce: could not set the channel timeslice value for CE context"); + nvgpu_err(g, "ce: set timesliced failed for CE context"); goto end; } } @@ -538,8 +598,7 @@ u32 gk20a_ce_create_context(struct gk20a *g, err = gk20a_tsg_set_runlist_interleave(ce_ctx->tsg, runlist_level); if (err != 0) { - nvgpu_err(g, - "ce: could not set the runlist interleave for CE context"); + nvgpu_err(g, "ce: set runlist interleave failed"); goto end; } } @@ -556,24 +615,18 @@ u32 gk20a_ce_create_context(struct gk20a *g, end: if (ctx_id == NVGPU_CE_INVAL_CTX_ID) { nvgpu_mutex_acquire(&ce_app->app_mutex); - gk20a_ce_delete_gpu_context(ce_ctx); + nvgpu_ce_delete_gpu_context_locked(ce_ctx); nvgpu_mutex_release(&ce_app->app_mutex); } return ctx_id; } -void gk20a_ce_delete_context(struct gk20a *g, +void nvgpu_ce_delete_context(struct gk20a *g, u32 ce_ctx_id) { - gk20a_ce_delete_context_priv(g, ce_ctx_id); -} - -void gk20a_ce_delete_context_priv(struct gk20a *g, - u32 ce_ctx_id) -{ - struct gk20a_ce_app *ce_app = g->ce_app; - struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; + struct nvgpu_ce_app *ce_app = g->ce_app; + struct nvgpu_ce_gpu_ctx *ce_ctx, *ce_ctx_save; if (ce_app == NULL || !ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) { @@ -583,14 +636,13 @@ void gk20a_ce_delete_context_priv(struct gk20a *g, nvgpu_mutex_acquire(&ce_app->app_mutex); nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, - &ce_app->allocated_contexts, gk20a_gpu_ctx, list) { + &ce_app->allocated_contexts, nvgpu_ce_gpu_ctx, list) { if (ce_ctx->ctx_id == ce_ctx_id) { - gk20a_ce_delete_gpu_context(ce_ctx); + nvgpu_ce_delete_gpu_context_locked(ce_ctx); --ce_app->ctx_count; break; } } nvgpu_mutex_release(&ce_app->app_mutex); - return; } diff --git a/drivers/gpu/nvgpu/common/ce/ce_priv.h b/drivers/gpu/nvgpu/common/ce/ce_priv.h new file mode 100644 index 000000000..be1d87430 --- /dev/null +++ b/drivers/gpu/nvgpu/common/ce/ce_priv.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_CE_PRIV_H +#define NVGPU_CE_PRIV_H + +#include +#include +#include +#include + +/* + * Copy engine defines line size in pixels + */ +#define MAX_CE_SHIFT 31U /* 4Gpixels -1 */ +#define MAX_CE_MASK ((u32) (~(~0U << MAX_CE_SHIFT))) +#define MAX_CE_ALIGN(a) ((a) & MAX_CE_MASK) + +struct gk20a; + +/* ce context db */ +struct nvgpu_ce_gpu_ctx { + struct gk20a *g; + u32 ctx_id; + struct nvgpu_mutex gpu_ctx_mutex; + int gpu_ctx_state; + + /* tsg related data */ + struct tsg_gk20a *tsg; + + /* channel related data */ + struct channel_gk20a *ch; + struct vm_gk20a *vm; + + /* cmd buf mem_desc */ + struct nvgpu_mem cmd_buf_mem; + struct nvgpu_fence_type *postfences[NVGPU_CE_MAX_INFLIGHT_JOBS]; + + struct nvgpu_list_node list; + + u32 cmd_buf_read_queue_offset; +}; + +/* global ce app db */ +struct nvgpu_ce_app { + bool initialised; + struct nvgpu_mutex app_mutex; + int app_state; + + struct nvgpu_list_node allocated_contexts; + u32 ctx_count; + u32 next_ctx_id; +}; + +static inline struct nvgpu_ce_gpu_ctx * +nvgpu_ce_gpu_ctx_from_list(struct nvgpu_list_node *node) +{ + return (struct nvgpu_ce_gpu_ctx *) + ((uintptr_t)node - offsetof(struct nvgpu_ce_gpu_ctx, list)); +}; + +u32 nvgpu_ce_prepare_submit(u64 src_buf, + u64 dst_buf, + u64 size, + u32 *cmd_buf_cpu_va, + u32 max_cmd_buf_size, + unsigned int payload, + u32 launch_flags, + u32 request_operation, + u32 dma_copy_class); + +#endif /*NVGPU_CE_PRIV_H*/ diff --git a/drivers/gpu/nvgpu/common/ce2.c b/drivers/gpu/nvgpu/common/ce2.c deleted file mode 100644 index 8245fd931..000000000 --- a/drivers/gpu/nvgpu/common/ce2.c +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "gk20a/ce2_gk20a.h" - -static inline u32 gk20a_get_valid_launch_flags(struct gk20a *g, u32 launch_flags) -{ - /* there is no local memory available, - don't allow local memory related CE flags */ - if (g->mm.vidmem.size == 0ULL) { - launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB | - NVGPU_CE_DST_LOCATION_LOCAL_FB); - } - return launch_flags; -} - -int gk20a_ce_execute_ops(struct gk20a *g, - u32 ce_ctx_id, - u64 src_buf, - u64 dst_buf, - u64 size, - unsigned int payload, - u32 launch_flags, - u32 request_operation, - u32 submit_flags, - struct nvgpu_fence_type **fence_out) -{ - int ret = -EPERM; - struct gk20a_ce_app *ce_app = g->ce_app; - struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save; - bool found = false; - u32 *cmd_buf_cpu_va; - u64 cmd_buf_gpu_va = 0; - u32 method_size; - u32 cmd_buf_read_offset; - u32 dma_copy_class; - struct nvgpu_gpfifo_entry gpfifo; - struct nvgpu_channel_fence fence = {0, 0}; - struct nvgpu_fence_type *ce_cmd_buf_fence_out = NULL; - - if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) { - goto end; - } - - nvgpu_mutex_acquire(&ce_app->app_mutex); - - nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save, - &ce_app->allocated_contexts, gk20a_gpu_ctx, list) { - if (ce_ctx->ctx_id == ce_ctx_id) { - found = true; - break; - } - } - - nvgpu_mutex_release(&ce_app->app_mutex); - - if (!found) { - ret = -EINVAL; - goto end; - } - - if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) { - ret = -ENODEV; - goto end; - } - - nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex); - - ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS; - - cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset * - (NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / - U32(sizeof(u32)))); - - cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va; - - if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] != NULL) { - struct nvgpu_fence_type **prev_post_fence = - &ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]; - - ret = nvgpu_fence_wait(g, *prev_post_fence, - nvgpu_get_poll_timeout(g)); - - nvgpu_fence_put(*prev_post_fence); - *prev_post_fence = NULL; - if (ret != 0) { - goto noop; - } - } - - cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + - (u64)(cmd_buf_read_offset *sizeof(u32))); - - dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS); - method_size = gk20a_ce_prepare_submit(src_buf, - dst_buf, - size, - &cmd_buf_cpu_va[cmd_buf_read_offset], - NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF, - payload, - gk20a_get_valid_launch_flags(g, launch_flags), - request_operation, - dma_copy_class); - - if (method_size != 0U) { - /* store the element into gpfifo */ - g->ops.pbdma.format_gpfifo_entry(g, &gpfifo, - cmd_buf_gpu_va, method_size); - - /* - * take always the postfence as it is needed for protecting the - * ce context - */ - submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET; - - nvgpu_smp_wmb(); - - ret = nvgpu_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo, - 1, submit_flags, &fence, &ce_cmd_buf_fence_out); - - if (ret == 0) { - ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] = - ce_cmd_buf_fence_out; - if (fence_out != NULL) { - nvgpu_fence_get(ce_cmd_buf_fence_out); - *fence_out = ce_cmd_buf_fence_out; - } - - /* Next available command buffer queue Index */ - ++ce_ctx->cmd_buf_read_queue_offset; - } - } else { - ret = -ENOMEM; - } -noop: - nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex); -end: - return ret; -} diff --git a/drivers/gpu/nvgpu/common/fifo/engines.c b/drivers/gpu/nvgpu/common/fifo/engines.c index d67f00189..03609604f 100644 --- a/drivers/gpu/nvgpu/common/fifo/engines.c +++ b/drivers/gpu/nvgpu/common/fifo/engines.c @@ -204,8 +204,8 @@ u32 nvgpu_engine_interrupt_mask(struct gk20a *g) engine_enum = g->fifo.engine_info[active_engine_id].engine_enum; if (((engine_enum == NVGPU_ENGINE_GRCE_GK20A) || (engine_enum == NVGPU_ENGINE_ASYNC_CE_GK20A)) && - ((g->ops.ce2.isr_stall == NULL) || - (g->ops.ce2.isr_nonstall == NULL))) { + ((g->ops.ce.isr_stall == NULL) || + (g->ops.ce.isr_nonstall == NULL))) { continue; } diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c index 483636535..93e52d734 100644 --- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -47,8 +48,6 @@ #include -#include "gk20a/ce2_gk20a.h" - bool is_nvgpu_gpu_state_valid(struct gk20a *g) { u32 boot_0 = 0xffffffffU; @@ -129,7 +128,7 @@ int gk20a_prepare_poweroff(struct gk20a *g) nvgpu_falcon_sw_free(g, FALCON_ID_SEC2); nvgpu_falcon_sw_free(g, FALCON_ID_PMU); - gk20a_ce_suspend(g); + nvgpu_ce_suspend(g); /* Disable GPCPLL */ if (g->ops.clk.suspend_clk_support != NULL) { @@ -423,7 +422,7 @@ int gk20a_finalize_poweron(struct gk20a *g) /* Restore the debug setting */ g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl); - gk20a_init_ce_support(g); + nvgpu_ce_init_support(g); if (g->ops.xve.available_speeds != NULL) { u32 speed; @@ -579,7 +578,7 @@ static void gk20a_free_cb(struct nvgpu_ref *refcount) nvgpu_log(g, gpu_dbg_shutdown, "Freeing GK20A struct!"); - gk20a_ce_destroy(g); + nvgpu_ce_destroy(g); nvgpu_cbc_remove_support(g); diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c index 47990a03f..5da4d9677 100644 --- a/drivers/gpu/nvgpu/common/mm/mm.c +++ b/drivers/gpu/nvgpu/common/mm/mm.c @@ -30,11 +30,10 @@ #include #include #include +#include #include #include -#include "gk20a/ce2_gk20a.h" - /* * Attempt to find a reserved memory area to determine PTE size for the passed * mapping. If no reserved area can be found use small pages. @@ -179,7 +178,7 @@ static void nvgpu_remove_mm_ce_support(struct mm_gk20a *mm) struct gk20a *g = gk20a_from_mm(mm); if (mm->vidmem.ce_ctx_id != NVGPU_CE_INVAL_CTX_ID) { - gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id); + nvgpu_ce_delete_context(g, mm->vidmem.ce_ctx_id); } mm->vidmem.ce_ctx_id = NVGPU_CE_INVAL_CTX_ID; @@ -362,7 +361,7 @@ void nvgpu_init_mm_ce_context(struct gk20a *g) if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == NVGPU_CE_INVAL_CTX_ID)) { g->mm.vidmem.ce_ctx_id = - gk20a_ce_create_context(g, + nvgpu_ce_create_context(g, nvgpu_engine_get_fast_ce_runlist_id(g), -1, -1); diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c index 01d3913a9..144275fcc 100644 --- a/drivers/gpu/nvgpu/common/mm/vidmem.c +++ b/drivers/gpu/nvgpu/common/mm/vidmem.c @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -32,7 +33,6 @@ #include #include "gk20a/mm_gk20a.h" -#include "gk20a/ce2_gk20a.h" /* * This is expected to be called from the shutdown path (or the error path in @@ -104,7 +104,7 @@ static int __nvgpu_vidmem_do_clear_all(struct gk20a *g) vidmem_dbg(g, "Clearing all VIDMEM:"); - err = gk20a_ce_execute_ops(g, + err = nvgpu_ce_execute_ops(g, mm->vidmem.ce_ctx_id, 0, mm->vidmem.base, @@ -472,7 +472,7 @@ int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem) nvgpu_fence_put(last_fence); } - err = gk20a_ce_execute_ops(g, + err = nvgpu_ce_execute_ops(g, g->mm.vidmem.ce_ctx_id, 0, nvgpu_sgt_get_phys(g, &alloc->sgt, sgl), @@ -485,7 +485,7 @@ int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem) if (err != 0) { nvgpu_err(g, - "Failed gk20a_ce_execute_ops[%d]", err); + "Failed nvgpu_ce_execute_ops[%d]", err); return err; } diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index f526d706b..80bf1be9a 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -85,7 +85,6 @@ #include "common/sync/sema_cmdbuf_gk20a.h" #include "gp10b/mm_gp10b.h" -#include "gp10b/ce_gp10b.h" #include "gp10b/gr_gp10b.h" #include "gm20b/gr_gm20b.h" @@ -119,7 +118,7 @@ static const struct gpu_ops vgpu_gp10b_ops = { .ctrl = NULL, .fix_config = NULL, }, - .ce2 = { + .ce = { .isr_stall = NULL, .isr_nonstall = NULL, .get_num_pce = vgpu_ce_get_num_pce, @@ -764,7 +763,7 @@ int vgpu_gp10b_init_hal(struct gk20a *g) gops->ltc = vgpu_gp10b_ops.ltc; gops->cbc = vgpu_gp10b_ops.cbc; - gops->ce2 = vgpu_gp10b_ops.ce2; + gops->ce = vgpu_gp10b_ops.ce; gops->gr = vgpu_gp10b_ops.gr; gops->class = vgpu_gp10b_ops.class; gops->gr.ctxsw_prog = vgpu_gp10b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index 21bb15d4e..9715c0995 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -94,11 +94,9 @@ #include #include -#include #include "gp10b/gr_gp10b.h" #include -#include #include #include @@ -144,7 +142,7 @@ static const struct gpu_ops vgpu_gv11b_ops = { .ctrl = NULL, .alloc_comptags = vgpu_cbc_alloc_comptags, }, - .ce2 = { + .ce = { .isr_stall = NULL, .isr_nonstall = NULL, .get_num_pce = vgpu_ce_get_num_pce, @@ -858,7 +856,7 @@ int vgpu_gv11b_init_hal(struct gk20a *g) gops->ltc = vgpu_gv11b_ops.ltc; gops->cbc = vgpu_gv11b_ops.cbc; - gops->ce2 = vgpu_gv11b_ops.ce2; + gops->ce = vgpu_gv11b_ops.ce; gops->gr = vgpu_gv11b_ops.gr; gops->class = vgpu_gv11b_ops.class; gops->gr.ctxsw_prog = vgpu_gv11b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/hal/ce/ce2_gk20a.c b/drivers/gpu/nvgpu/hal/ce/ce2_gk20a.c new file mode 100644 index 000000000..77c65a9c6 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/ce/ce2_gk20a.c @@ -0,0 +1,76 @@ +/* + * GK20A Graphics Copy Engine (gr host) + * + * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ce2_gk20a.h" + +#include + +void gk20a_ce2_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) +{ + u32 ce2_intr = nvgpu_readl(g, ce2_intr_status_r()); + u32 clear_intr = 0U; + + nvgpu_log(g, gpu_dbg_intr, "ce2 isr %08x", ce2_intr); + + /* clear blocking interrupts: they exibit broken behavior */ + if ((ce2_intr & ce2_intr_status_blockpipe_pending_f()) != 0U) { + nvgpu_log(g, gpu_dbg_intr, "ce2 blocking pipe interrupt"); + clear_intr |= ce2_intr_status_blockpipe_pending_f(); + } + + if ((ce2_intr & ce2_intr_status_launcherr_pending_f()) != 0U) { + nvgpu_log(g, gpu_dbg_intr, "ce2 launch error interrupt"); + clear_intr |= ce2_intr_status_launcherr_pending_f(); + } + + nvgpu_writel(g, ce2_intr_status_r(), clear_intr); +} + +u32 gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) +{ + u32 ops = 0U; + u32 ce2_intr = nvgpu_readl(g, ce2_intr_status_r()); + + nvgpu_log(g, gpu_dbg_intr, "ce2 nonstall isr %08x", ce2_intr); + + if ((ce2_intr & ce2_intr_status_nonblockpipe_pending_f()) != 0U) { + nvgpu_log(g, gpu_dbg_intr, "ce2 non-blocking pipe interrupt"); + nvgpu_writel(g, ce2_intr_status_r(), + ce2_intr_status_nonblockpipe_pending_f()); + ops |= (GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE | + GK20A_NONSTALL_OPS_POST_EVENTS); + } + return ops; +} diff --git a/drivers/gpu/nvgpu/hal/ce/ce2_gk20a.h b/drivers/gpu/nvgpu/hal/ce/ce2_gk20a.h new file mode 100644 index 000000000..653b97cdf --- /dev/null +++ b/drivers/gpu/nvgpu/hal/ce/ce2_gk20a.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef NVGPU_CE2_GK20A_H +#define NVGPU_CE2_GK20A_H + +#include + +struct gk20a; + +void gk20a_ce2_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); +u32 gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); + +#endif /*NVGPU_CE2_GK20A_H*/ diff --git a/drivers/gpu/nvgpu/gp10b/ce_gp10b.c b/drivers/gpu/nvgpu/hal/ce/ce_gp10b.c similarity index 69% rename from drivers/gpu/nvgpu/gp10b/ce_gp10b.c rename to drivers/gpu/nvgpu/hal/ce/ce_gp10b.c index d2993e487..0060dffc5 100644 --- a/drivers/gpu/nvgpu/gp10b/ce_gp10b.c +++ b/drivers/gpu/nvgpu/hal/ce/ce_gp10b.c @@ -30,55 +30,44 @@ #include -static u32 ce_blockpipe_isr(struct gk20a *g, u32 fifo_intr) +void gp10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) { - nvgpu_log(g, gpu_dbg_intr, "ce blocking pipe interrupt\n"); + u32 ce_intr = nvgpu_readl(g, ce_intr_status_r(inst_id)); + u32 clear_intr = 0U; - return ce_intr_status_blockpipe_pending_f(); -} - -static u32 ce_launcherr_isr(struct gk20a *g, u32 fifo_intr) -{ - nvgpu_log(g, gpu_dbg_intr, "ce launch error interrupt\n"); - - return ce_intr_status_launcherr_pending_f(); -} - -void gp10b_ce_isr(struct gk20a *g, u32 inst_id, u32 pri_base) -{ - u32 ce_intr = gk20a_readl(g, ce_intr_status_r(inst_id)); - u32 clear_intr = 0; - - nvgpu_log(g, gpu_dbg_intr, "ce isr %08x %08x\n", ce_intr, inst_id); + nvgpu_log(g, gpu_dbg_intr, "ce isr %08x %08x", ce_intr, inst_id); /* clear blocking interrupts: they exibit broken behavior */ if ((ce_intr & ce_intr_status_blockpipe_pending_f()) != 0U) { nvgpu_report_ce_error(g, inst_id, GPU_CE_BLOCK_PIPE, ce_intr); - clear_intr |= ce_blockpipe_isr(g, ce_intr); + nvgpu_log(g, gpu_dbg_intr, "ce blocking pipe interrupt"); + clear_intr |= ce_intr_status_blockpipe_pending_f(); } if ((ce_intr & ce_intr_status_launcherr_pending_f()) != 0U) { nvgpu_report_ce_error(g, inst_id, GPU_CE_LAUNCH_ERROR, ce_intr); - clear_intr |= ce_launcherr_isr(g, ce_intr); + nvgpu_log(g, gpu_dbg_intr, "ce launch error interrupt"); + clear_intr |= ce_intr_status_launcherr_pending_f(); } - gk20a_writel(g, ce_intr_status_r(inst_id), clear_intr); + nvgpu_writel(g, ce_intr_status_r(inst_id), clear_intr); return; } u32 gp10b_ce_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) { - u32 ops = 0; - u32 ce_intr = gk20a_readl(g, ce_intr_status_r(inst_id)); + u32 ops = 0U; + u32 ce_intr = nvgpu_readl(g, ce_intr_status_r(inst_id)); - nvgpu_log(g, gpu_dbg_intr, "ce nonstall isr %08x %08x\n", ce_intr, inst_id); + nvgpu_log(g, gpu_dbg_intr, "ce nonstall isr %08x %08x", + ce_intr, inst_id); if ((ce_intr & ce_intr_status_nonblockpipe_pending_f()) != 0U) { nvgpu_report_ce_error(g, inst_id, GPU_CE_NONBLOCK_PIPE, ce_intr); - gk20a_writel(g, ce_intr_status_r(inst_id), + nvgpu_writel(g, ce_intr_status_r(inst_id), ce_intr_status_nonblockpipe_pending_f()); ops |= (GK20A_NONSTALL_OPS_WAKEUP_SEMAPHORE | GK20A_NONSTALL_OPS_POST_EVENTS); diff --git a/drivers/gpu/nvgpu/gp10b/ce_gp10b.h b/drivers/gpu/nvgpu/hal/ce/ce_gp10b.h similarity index 90% rename from drivers/gpu/nvgpu/gp10b/ce_gp10b.h rename to drivers/gpu/nvgpu/hal/ce/ce_gp10b.h index 4fa27d1e5..54ef19f1d 100644 --- a/drivers/gpu/nvgpu/gp10b/ce_gp10b.h +++ b/drivers/gpu/nvgpu/hal/ce/ce_gp10b.h @@ -1,7 +1,7 @@ /* * Pascal GPU series Copy Engine. * - * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -28,7 +28,7 @@ struct gk20a; -void gp10b_ce_isr(struct gk20a *g, u32 inst_id, u32 pri_base); +void gp10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); u32 gp10b_ce_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); #endif /* NVGPU_CE_GP10B_H */ diff --git a/drivers/gpu/nvgpu/gv11b/ce_gv11b.c b/drivers/gpu/nvgpu/hal/ce/ce_gv11b.c similarity index 76% rename from drivers/gpu/nvgpu/gv11b/ce_gv11b.c rename to drivers/gpu/nvgpu/hal/ce/ce_gv11b.c index c85187f7d..925606ce1 100644 --- a/drivers/gpu/nvgpu/gv11b/ce_gv11b.c +++ b/drivers/gpu/nvgpu/hal/ce/ce_gv11b.c @@ -28,9 +28,8 @@ #include #include -#include "gp10b/ce_gp10b.h" - -#include "ce_gv11b.h" +#include "hal/ce/ce_gp10b.h" +#include "hal/ce/ce_gv11b.h" #include #include @@ -40,39 +39,41 @@ void nvgpu_report_ce_error(struct gk20a *g, u32 inst, { int ret = 0; - if (g->ops.ce2.err_ops.report_ce_err == NULL) { + if (g->ops.ce.err_ops.report_ce_err == NULL) { return; } - ret = g->ops.ce2.err_ops.report_ce_err(g, + ret = g->ops.ce.err_ops.report_ce_err(g, NVGPU_ERR_MODULE_CE, inst, err_type, status); if (ret != 0) { - nvgpu_err(g, "Failed to report CE error: " - "inst=%u, err_type=%u, status=%u", - inst, err_type, status); + nvgpu_err(g, + "report_ce_err failed inst=%u err_type=%u status=%u", + inst, err_type, status); } } u32 gv11b_ce_get_num_pce(struct gk20a *g) { - /* register contains a bitmask indicating which physical copy + /* + * register contains a bitmask indicating which physical copy * engines are present (and not floorswept). */ u32 num_pce; - u32 ce_pce_map = gk20a_readl(g, ce_pce_map_r()); + u32 ce_pce_map = nvgpu_readl(g, ce_pce_map_r()); num_pce = U32(hweight32(ce_pce_map)); nvgpu_log_info(g, "num PCE: %d", num_pce); return num_pce; } -void gv11b_ce_isr(struct gk20a *g, u32 inst_id, u32 pri_base) +void gv11b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) { - u32 ce_intr = gk20a_readl(g, ce_intr_status_r(inst_id)); - u32 clear_intr = 0; + u32 ce_intr = nvgpu_readl(g, ce_intr_status_r(inst_id)); + u32 clear_intr = 0U; nvgpu_log(g, gpu_dbg_intr, "ce isr 0x%08x 0x%08x", ce_intr, inst_id); - /* An INVALID_CONFIG interrupt will be generated if a floorswept + /* + * An INVALID_CONFIG interrupt will be generated if a floorswept * PCE is assigned to a valid LCE in the NV_CE_PCE2LCE_CONFIG * registers. This is a fatal error and the LCE will have to be * reset to get back to a working state. @@ -85,7 +86,8 @@ void gv11b_ce_isr(struct gk20a *g, u32 inst_id, u32 pri_base) clear_intr |= ce_intr_status_invalid_config_reset_f(); } - /* A MTHD_BUFFER_FAULT interrupt will be triggered if any access + /* + * A MTHD_BUFFER_FAULT interrupt will be triggered if any access * to a method buffer during context load or save encounters a fault. * This is a fatal interrupt and will require at least the LCE to be * reset before operations can start again, if not the entire GPU. @@ -98,16 +100,16 @@ void gv11b_ce_isr(struct gk20a *g, u32 inst_id, u32 pri_base) clear_intr |= ce_intr_status_mthd_buffer_fault_reset_f(); } - gk20a_writel(g, ce_intr_status_r(inst_id), clear_intr); + nvgpu_writel(g, ce_intr_status_r(inst_id), clear_intr); - gp10b_ce_isr(g, inst_id, pri_base); + gp10b_ce_stall_isr(g, inst_id, pri_base); } u32 gv11b_ce_get_num_lce(struct gk20a *g) { u32 reg_val, num_lce; - reg_val = gk20a_readl(g, top_num_ces_r()); + reg_val = nvgpu_readl(g, top_num_ces_r()); num_lce = top_num_ces_value_v(reg_val); nvgpu_log_info(g, "num LCE: %d", num_lce); @@ -120,13 +122,14 @@ void gv11b_ce_mthd_buffer_fault_in_bar2_fault(struct gk20a *g) num_lce = gv11b_ce_get_num_lce(g); - for (lce = 0; lce < num_lce; lce++) { - reg_val = gk20a_readl(g, ce_intr_status_r(lce)); - if ((reg_val & ce_intr_status_mthd_buffer_fault_pending_f()) != 0U) { + for (lce = 0U; lce < num_lce; lce++) { + reg_val = nvgpu_readl(g, ce_intr_status_r(lce)); + if ((reg_val & + ce_intr_status_mthd_buffer_fault_pending_f()) != 0U) { nvgpu_log(g, gpu_dbg_intr, "ce: lce %d: mthd buffer fault", lce); clear_intr = ce_intr_status_mthd_buffer_fault_reset_f(); - gk20a_writel(g, ce_intr_status_r(lce), clear_intr); + nvgpu_writel(g, ce_intr_status_r(lce), clear_intr); } } } diff --git a/drivers/gpu/nvgpu/gv11b/ce_gv11b.h b/drivers/gpu/nvgpu/hal/ce/ce_gv11b.h similarity index 91% rename from drivers/gpu/nvgpu/gv11b/ce_gv11b.h rename to drivers/gpu/nvgpu/hal/ce/ce_gv11b.h index 43a891e6f..f2a981dbd 100644 --- a/drivers/gpu/nvgpu/gv11b/ce_gv11b.h +++ b/drivers/gpu/nvgpu/hal/ce/ce_gv11b.h @@ -2,7 +2,7 @@ * * Volta GPU series copy engine * - * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -30,6 +30,6 @@ struct gk20a; void gv11b_ce_mthd_buffer_fault_in_bar2_fault(struct gk20a *g); u32 gv11b_ce_get_num_lce(struct gk20a *g); u32 gv11b_ce_get_num_pce(struct gk20a *g); -void gv11b_ce_isr(struct gk20a *g, u32 inst_id, u32 pri_base); +void gv11b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); #endif /* NVGPU_CE_GV11B_H */ diff --git a/drivers/gpu/nvgpu/hal/fb/fb_gv11b.c b/drivers/gpu/nvgpu/hal/fb/fb_gv11b.c index ae4af9aa4..384b01208 100644 --- a/drivers/gpu/nvgpu/hal/fb/fb_gv11b.c +++ b/drivers/gpu/nvgpu/hal/fb/fb_gv11b.c @@ -43,7 +43,9 @@ #include "gk20a/mm_gk20a.h" #include "gv11b/fifo_gv11b.h" -#include "gv11b/ce_gv11b.h" + +/* TODO: add hals for gv11b_ce_get_num_lce and gv11b_ce_mthd_buffer_fault_in_bar2_fault */ +#include "hal/ce/ce_gv11b.h" #include "fb_gm20b.h" #include "fb_gp10b.h" diff --git a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.c b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.c index edf8e045c..64c247278 100644 --- a/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.c +++ b/drivers/gpu/nvgpu/hal/fifo/tsg_gv11b.c @@ -105,7 +105,7 @@ static unsigned int gv11b_tsg_get_eng_method_buffer_size(struct gk20a *g) { unsigned int buffer_size; - buffer_size = ((9U + 1U + 3U) * g->ops.ce2.get_num_pce(g)) + 2U; + buffer_size = ((9U + 1U + 3U) * g->ops.ce.get_num_pce(g)) + 2U; buffer_size = (27U * 5U * buffer_size); buffer_size = roundup(buffer_size, PAGE_SIZE); nvgpu_log_info(g, "method buffer size in bytes %d", buffer_size); diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c index 27be7423b..e99781ccf 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c @@ -44,6 +44,7 @@ #include "hal/mc/mc_gm20b.h" #include "hal/bus/bus_gm20b.h" #include "hal/bus/bus_gk20a.h" +#include "hal/ce/ce2_gk20a.h" #include "hal/class/class_gm20b.h" #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/power_features/cg/gm20b_gating_reglist.h" @@ -90,8 +91,6 @@ #include "common/fifo/channel_gm20b.h" #include "common/pmu/pg/pg_sw_gm20b.h" -#include "gk20a/ce2_gk20a.h" - #include "gm20b/gr_gm20b.h" #include "gm20b/mm_gm20b.h" #include "gm20b/clk_gm20b.h" @@ -240,8 +239,8 @@ static const struct gpu_ops gm20b_ops = { .alloc_comptags = gm20b_cbc_alloc_comptags, .fix_config = gm20b_cbc_fix_config, }, - .ce2 = { - .isr_stall = gk20a_ce2_isr, + .ce = { + .isr_stall = gk20a_ce2_stall_isr, .isr_nonstall = gk20a_ce2_nonstall_isr, }, .gr = { @@ -1049,7 +1048,7 @@ int gm20b_init_hal(struct gk20a *g) gops->ltc = gm20b_ops.ltc; gops->cbc = gm20b_ops.cbc; - gops->ce2 = gm20b_ops.ce2; + gops->ce = gm20b_ops.ce; gops->gr = gm20b_ops.gr; gops->class = gm20b_ops.class; gops->gr.ctxsw_prog = gm20b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c index 1ab1b585e..66e39c23e 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c @@ -47,6 +47,7 @@ #include "hal/bus/bus_gk20a.h" #include "hal/bus/bus_gm20b.h" #include "hal/bus/bus_gp10b.h" +#include "hal/ce/ce_gp10b.h" #include "hal/class/class_gp10b.h" #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/priv_ring/priv_ring_gp10b.h" @@ -117,7 +118,6 @@ #include "gp10b/gr_gp10b.h" #include "gp10b/mm_gp10b.h" -#include "gp10b/ce_gp10b.h" #include "gm20b/gr_gm20b.h" #include "gm20b/mm_gm20b.h" @@ -273,8 +273,8 @@ static const struct gpu_ops gp10b_ops = { .ctrl = gp10b_cbc_ctrl, .fix_config = gm20b_cbc_fix_config, }, - .ce2 = { - .isr_stall = gp10b_ce_isr, + .ce = { + .isr_stall = gp10b_ce_stall_isr, .isr_nonstall = gp10b_ce_nonstall_isr, }, .gr = { @@ -1151,7 +1151,7 @@ int gp10b_init_hal(struct gk20a *g) gops->ltc = gp10b_ops.ltc; gops->cbc = gp10b_ops.cbc; - gops->ce2 = gp10b_ops.ce2; + gops->ce = gp10b_ops.ce; gops->gr = gp10b_ops.gr; gops->class = gp10b_ops.class; gops->gr.ctxsw_prog = gp10b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv100.c b/drivers/gpu/nvgpu/hal/init/hal_gv100.c index d53142b45..4947e984a 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv100.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv100.c @@ -31,6 +31,8 @@ #include "hal/bus/bus_gk20a.h" #include "hal/bus/bus_gp10b.h" #include "hal/bus/bus_gv100.h" +#include "hal/ce/ce_gp10b.h" +#include "hal/ce/ce_gv11b.h" #include "hal/class/class_gv11b.h" #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/priv_ring/priv_ring_gp10b.h" @@ -134,11 +136,9 @@ #include "gp106/bios_gp106.h" #include "gp10b/gr_gp10b.h" -#include "gp10b/ce_gp10b.h" #include "gp10b/mm_gp10b.h" #include "gv11b/gr_gv11b.h" -#include "gv11b/ce_gv11b.h" #include "gv11b/mm_gv11b.h" #include "gv11b/fifo_gv11b.h" @@ -370,8 +370,8 @@ static const struct gpu_ops gv100_ops = { .ctrl = gp10b_cbc_ctrl, .fix_config = NULL, }, - .ce2 = { - .isr_stall = gv11b_ce_isr, + .ce = { + .isr_stall = gv11b_ce_stall_isr, .isr_nonstall = gp10b_ce_nonstall_isr, .get_num_pce = gv11b_ce_get_num_pce, }, @@ -1430,7 +1430,7 @@ int gv100_init_hal(struct gk20a *g) gops->bios = gv100_ops.bios; gops->ltc = gv100_ops.ltc; gops->cbc = gv100_ops.cbc; - gops->ce2 = gv100_ops.ce2; + gops->ce = gv100_ops.ce; gops->gr = gv100_ops.gr; gops->class = gv100_ops.class; gops->gr.ctxsw_prog = gv100_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index 195cff1e0..7a7f68a62 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -38,6 +38,7 @@ #include "hal/bus/bus_gk20a.h" #include "hal/bus/bus_gp10b.h" #include "hal/bus/bus_gm20b.h" +#include "hal/ce/ce_gv11b.h" #include "hal/class/class_gv11b.h" #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/priv_ring/priv_ring_gp10b.h" @@ -45,6 +46,7 @@ #include "hal/power_features/cg/gv11b_gating_reglist.h" #include "hal/cbc/cbc_gp10b.h" #include "hal/cbc/cbc_gv11b.h" +#include "hal/ce/ce_gp10b.h" #include "hal/therm/therm_gm20b.h" #include "hal/therm/therm_gv11b.h" #include "hal/ltc/ltc_gm20b.h" @@ -126,7 +128,6 @@ #include "gm20b/gr_gm20b.h" #include "gm20b/mm_gm20b.h" -#include "gp10b/ce_gp10b.h" #include "gp10b/mm_gp10b.h" #include "gp10b/gr_gp10b.h" @@ -134,7 +135,6 @@ #include "hal_gv11b.h" #include "gv11b/gr_gv11b.h" -#include "gv11b/ce_gv11b.h" #include "gv11b/mm_gv11b.h" #include "gv11b/fifo_gv11b.h" @@ -340,8 +340,8 @@ static const struct gpu_ops gv11b_ops = { .alloc_comptags = gp10b_cbc_alloc_comptags, .ctrl = gp10b_cbc_ctrl, }, - .ce2 = { - .isr_stall = gv11b_ce_isr, + .ce = { + .isr_stall = gv11b_ce_stall_isr, .isr_nonstall = gp10b_ce_nonstall_isr, .get_num_pce = gv11b_ce_get_num_pce, }, @@ -1330,7 +1330,7 @@ int gv11b_init_hal(struct gk20a *g) gops->ltc = gv11b_ops.ltc; gops->cbc = gv11b_ops.cbc; - gops->ce2 = gv11b_ops.ce2; + gops->ce = gv11b_ops.ce; gops->gr = gv11b_ops.gr; gops->class = gv11b_ops.class; gops->gr.ctxsw_prog = gv11b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 8e284c68e..8f7b46ae2 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -32,6 +32,8 @@ #include "hal/bus/bus_gk20a.h" #include "hal/bus/bus_gv100.h" #include "hal/bus/bus_tu104.h" +#include "hal/ce/ce_gp10b.h" +#include "hal/ce/ce_gv11b.h" #include "hal/class/class_tu104.h" #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/priv_ring/priv_ring_gp10b.h" @@ -151,7 +153,6 @@ #include "gp10b/mm_gp10b.h" #include "gv11b/gr_gv11b.h" -#include "gv11b/ce_gv11b.h" #include "gv11b/mm_gv11b.h" #include "gv11b/fifo_gv11b.h" @@ -389,8 +390,8 @@ static const struct gpu_ops tu104_ops = { .ctrl = tu104_cbc_ctrl, .fix_config = NULL, }, - .ce2 = { - .isr_stall = gv11b_ce_isr, + .ce = { + .isr_stall = gv11b_ce_stall_isr, .isr_nonstall = NULL, .get_num_pce = gv11b_ce_get_num_pce, }, @@ -1475,7 +1476,7 @@ int tu104_init_hal(struct gk20a *g) gops->bios = tu104_ops.bios; gops->ltc = tu104_ops.ltc; gops->cbc = tu104_ops.cbc; - gops->ce2 = tu104_ops.ce2; + gops->ce = tu104_ops.ce; gops->gr = tu104_ops.gr; gops->class = tu104_ops.class; gops->gr.ctxsw_prog = tu104_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/hal/mc/mc_gm20b.c b/drivers/gpu/nvgpu/hal/mc/mc_gm20b.c index 760b0a0f5..c16a20a88 100644 --- a/drivers/gpu/nvgpu/hal/mc/mc_gm20b.c +++ b/drivers/gpu/nvgpu/hal/mc/mc_gm20b.c @@ -64,8 +64,8 @@ void gm20b_mc_isr_stall(struct gk20a *g) /* CE Engine */ if (((engine_enum == NVGPU_ENGINE_GRCE_GK20A) || (engine_enum == NVGPU_ENGINE_ASYNC_CE_GK20A)) && - (g->ops.ce2.isr_stall != NULL)) { - g->ops.ce2.isr_stall(g, + (g->ops.ce.isr_stall != NULL)) { + g->ops.ce.isr_stall(g, g->fifo.engine_info[act_eng_id].inst_id, g->fifo.engine_info[act_eng_id].pri_base); } @@ -116,8 +116,8 @@ u32 gm20b_mc_isr_nonstall(struct gk20a *g) /* CE Engine */ if (((engine_enum == NVGPU_ENGINE_GRCE_GK20A) || (engine_enum == NVGPU_ENGINE_ASYNC_CE_GK20A)) && - (g->ops.ce2.isr_nonstall != NULL)) { - ops |= g->ops.ce2.isr_nonstall(g, + (g->ops.ce.isr_nonstall != NULL)) { + ops |= g->ops.ce.isr_nonstall(g, engine_info->inst_id, engine_info->pri_base); } diff --git a/drivers/gpu/nvgpu/hal/mc/mc_gp10b.c b/drivers/gpu/nvgpu/hal/mc/mc_gp10b.c index 19b4d854d..57905b0ae 100644 --- a/drivers/gpu/nvgpu/hal/mc/mc_gp10b.c +++ b/drivers/gpu/nvgpu/hal/mc/mc_gp10b.c @@ -117,8 +117,8 @@ void mc_gp10b_isr_stall(struct gk20a *g) /* CE Engine */ if (((engine_enum == NVGPU_ENGINE_GRCE_GK20A) || (engine_enum == NVGPU_ENGINE_ASYNC_CE_GK20A)) && - (g->ops.ce2.isr_stall != NULL)) { - g->ops.ce2.isr_stall(g, + (g->ops.ce.isr_stall != NULL)) { + g->ops.ce.isr_stall(g, g->fifo.engine_info[act_eng_id].inst_id, g->fifo.engine_info[act_eng_id].pri_base); } diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/ce.h similarity index 61% rename from drivers/gpu/nvgpu/gk20a/ce2_gk20a.h rename to drivers/gpu/nvgpu/include/nvgpu/ce.h index 32c8868f1..16262385f 100644 --- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/ce.h @@ -1,8 +1,4 @@ /* - * drivers/video/tegra/host/gk20a/fifo_gk20a.h - * - * GK20A graphics copy engine (gr host) - * * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -23,20 +19,18 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ -#ifndef NVGPU_GK20A_CE2_GK20A_H -#define NVGPU_GK20A_CE2_GK20A_H +#ifndef NVGPU_CE_H +#define NVGPU_CE_H + +#include -struct channel_gk20a; -struct tsg_gk20a; struct gk20a; - -void gk20a_ce2_isr(struct gk20a *g, u32 inst_id, u32 pri_base); -u32 gk20a_ce2_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base); +struct nvgpu_fence_type; #define NVGPU_CE_INVAL_CTX_ID ~U32(0U) /* CE command utility macros */ -#define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK 0xffffffffU +#define NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK U32_MAX #define NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK 0xffU #define NVGPU_CE_MAX_INFLIGHT_JOBS 32U @@ -75,58 +69,19 @@ enum { NVGPU_CE_GPU_CTX_DELETED = (1 << 1), }; -/* global ce app db */ -struct gk20a_ce_app { - bool initialised; - struct nvgpu_mutex app_mutex; - int app_state; - - struct nvgpu_list_node allocated_contexts; - u32 ctx_count; - u32 next_ctx_id; -}; - -/* ce context db */ -struct gk20a_gpu_ctx { - struct gk20a *g; - u32 ctx_id; - struct nvgpu_mutex gpu_ctx_mutex; - int gpu_ctx_state; - - /* tsg related data */ - struct tsg_gk20a *tsg; - - /* channel related data */ - struct channel_gk20a *ch; - struct vm_gk20a *vm; - - /* cmd buf mem_desc */ - struct nvgpu_mem cmd_buf_mem; - struct nvgpu_fence_type *postfences[NVGPU_CE_MAX_INFLIGHT_JOBS]; - - struct nvgpu_list_node list; - - u32 cmd_buf_read_queue_offset; -}; - -static inline struct gk20a_gpu_ctx * -gk20a_gpu_ctx_from_list(struct nvgpu_list_node *node) -{ - return (struct gk20a_gpu_ctx *) - ((uintptr_t)node - offsetof(struct gk20a_gpu_ctx, list)); -}; - /* global CE app related apis */ -int gk20a_init_ce_support(struct gk20a *g); -void gk20a_ce_suspend(struct gk20a *g); -void gk20a_ce_destroy(struct gk20a *g); +int nvgpu_ce_init_support(struct gk20a *g); +void nvgpu_ce_suspend(struct gk20a *g); +void nvgpu_ce_destroy(struct gk20a *g); /* CE app utility functions */ -u32 gk20a_ce_create_context(struct gk20a *g, +u32 nvgpu_ce_create_context(struct gk20a *g, u32 runlist_id, int timeslice, int runlist_level); -int gk20a_ce_execute_ops(struct gk20a *g, +void nvgpu_ce_delete_context(struct gk20a *g, + u32 ce_ctx_id); +int nvgpu_ce_execute_ops(struct gk20a *g, u32 ce_ctx_id, u64 src_buf, u64 dst_buf, @@ -136,18 +91,5 @@ int gk20a_ce_execute_ops(struct gk20a *g, u32 request_operation, u32 submit_flags, struct nvgpu_fence_type **fence_out); -void gk20a_ce_delete_context_priv(struct gk20a *g, - u32 ce_ctx_id); -void gk20a_ce_delete_context(struct gk20a *g, - u32 ce_ctx_id); -u32 gk20a_ce_prepare_submit(u64 src_buf, - u64 dst_buf, - u64 size, - u32 *cmd_buf_cpu_va, - u32 max_cmd_buf_size, - unsigned int payload, - u32 launch_flags, - u32 request_operation, - u32 dma_copy_class); -#endif /*NVGPU_GK20A_CE2_GK20A_H*/ +#endif /*NVGPU_CE_H*/ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 1061eb350..286c7147c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -29,7 +29,7 @@ struct fifo_gk20a; struct channel_gk20a; struct nvgpu_gr; struct sim_nvgpu; -struct gk20a_ce_app; +struct nvgpu_ce_app; struct gk20a_ctxsw_trace; struct nvgpu_mem_alloc_tracker; struct dbg_profiler_object_data; @@ -268,7 +268,7 @@ struct gpu_ops { u32 hw_id, u32 inst, u32 err_id, u32 status); } err_ops; - } ce2; + } ce; struct { void (*access_smpc_reg)(struct gk20a *g, u32 quad, u32 offset); int (*handle_sw_method)(struct gk20a *g, u32 addr, @@ -2218,7 +2218,7 @@ struct gk20a { nvgpu_atomic_t clk_arb_global_nr; - struct gk20a_ce_app *ce_app; + struct nvgpu_ce_app *ce_app; bool ltc_intr_en_illegal_compstat; diff --git a/drivers/gpu/nvgpu/os/linux/debug.c b/drivers/gpu/nvgpu/os/linux/debug.c index 533e083b0..3d79c6536 100644 --- a/drivers/gpu/nvgpu/os/linux/debug.c +++ b/drivers/gpu/nvgpu/os/linux/debug.c @@ -415,7 +415,7 @@ void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink) #ifdef CONFIG_NVGPU_SUPPORT_CDE gk20a_cde_debugfs_init(g); #endif - gk20a_ce_debugfs_init(g); + nvgpu_ce_debugfs_init(g); nvgpu_alloc_debugfs_init(g); nvgpu_hal_debugfs_init(g); gk20a_fifo_debugfs_init(g); diff --git a/drivers/gpu/nvgpu/os/linux/debug_ce.c b/drivers/gpu/nvgpu/os/linux/debug_ce.c index 69124886c..27c82a4b6 100644 --- a/drivers/gpu/nvgpu/os/linux/debug_ce.c +++ b/drivers/gpu/nvgpu/os/linux/debug_ce.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved. + * Copyright (C) 2017-2019 NVIDIA Corporation. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -15,11 +15,13 @@ #include "debug_ce.h" #include "os_linux.h" -#include "gk20a/ce2_gk20a.h" +#include + +#include #include -void gk20a_ce_debugfs_init(struct gk20a *g) +void nvgpu_ce_debugfs_init(struct gk20a *g) { struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); diff --git a/drivers/gpu/nvgpu/os/linux/debug_ce.h b/drivers/gpu/nvgpu/os/linux/debug_ce.h index 2a8750c4f..fd1a83a21 100644 --- a/drivers/gpu/nvgpu/os/linux/debug_ce.h +++ b/drivers/gpu/nvgpu/os/linux/debug_ce.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017 NVIDIA Corporation. All rights reserved. + * Copyright (C) 2017-2019 NVIDIA Corporation. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -16,6 +16,6 @@ #define __NVGPU_DEBUG_CE_H__ struct gk20a; -void gk20a_ce_debugfs_init(struct gk20a *g); +void nvgpu_ce_debugfs_init(struct gk20a *g); #endif /* __NVGPU_DEBUG_CE_H__ */