diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index 25545f29b..87199316d 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu @@ -45,6 +45,7 @@ nvgpu-y := \ common/linux/vm.o \ common/linux/intr.o \ common/linux/sysfs.o \ + common/linux/cde.o \ common/mm/nvgpu_allocator.o \ common/mm/bitmap_allocator.o \ common/mm/buddy_allocator.o \ @@ -92,7 +93,6 @@ nvgpu-y := \ gk20a/ltc_gk20a.o \ gk20a/fb_gk20a.o \ gk20a/hal.o \ - gk20a/cde_gk20a.o \ gk20a/tsg_gk20a.o \ gk20a/ctxsw_trace_gk20a.o \ gk20a/fecs_trace_gk20a.o \ diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/common/linux/cde.c similarity index 89% rename from drivers/gpu/nvgpu/gk20a/cde_gk20a.c rename to drivers/gpu/nvgpu/common/linux/cde.c index 506207f2d..5b0fb9102 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/common/linux/cde.c @@ -31,13 +31,14 @@ #include #include -#include "gk20a.h" -#include "channel_gk20a.h" -#include "mm_gk20a.h" -#include "cde_gk20a.h" -#include "fence_gk20a.h" -#include "gr_gk20a.h" -#include "common/linux/os_linux.h" +#include "gk20a/gk20a.h" +#include "gk20a/channel_gk20a.h" +#include "gk20a/mm_gk20a.h" +#include "gk20a/fence_gk20a.h" +#include "gk20a/gr_gk20a.h" + +#include "cde.h" +#include "os_linux.h" #include #include @@ -49,7 +50,7 @@ #include "common/linux/vm_priv.h" static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); -static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g); +static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l); #define CTX_DELETE_TIME 1000 @@ -65,7 +66,7 @@ static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) nvgpu_dma_unmap_free(cde_ctx->vm, mem); } - nvgpu_kfree(cde_ctx->g, cde_ctx->init_convert_cmd); + nvgpu_kfree(&cde_ctx->l->g, cde_ctx->init_convert_cmd); cde_ctx->convert_cmd = NULL; cde_ctx->init_convert_cmd = NULL; @@ -79,7 +80,8 @@ static void gk20a_deinit_cde_img(struct gk20a_cde_ctx *cde_ctx) static void gk20a_cde_remove_ctx(struct gk20a_cde_ctx *cde_ctx) __must_hold(&cde_app->mutex) { - struct gk20a *g = cde_ctx->g; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; struct channel_gk20a *ch = cde_ctx->ch; struct vm_gk20a *vm = ch->vm; @@ -95,7 +97,7 @@ __must_hold(&cde_app->mutex) /* housekeeping on app */ nvgpu_list_del(&cde_ctx->list); - cde_ctx->g->cde_app.ctx_count--; + l->cde_app.ctx_count--; nvgpu_kfree(g, cde_ctx); } @@ -104,7 +106,7 @@ static void gk20a_cde_cancel_deleter(struct gk20a_cde_ctx *cde_ctx, __releases(&cde_app->mutex) __acquires(&cde_app->mutex) { - struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; + struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; /* permanent contexts do not have deleter works */ if (!cde_ctx->is_temporary) @@ -119,10 +121,10 @@ __acquires(&cde_app->mutex) } } -static void gk20a_cde_remove_contexts(struct gk20a *g) -__must_hold(&cde_app->mutex) +static void gk20a_cde_remove_contexts(struct nvgpu_os_linux *l) +__must_hold(&l->cde_app->mutex) { - struct gk20a_cde_app *cde_app = &g->cde_app; + struct gk20a_cde_app *cde_app = &l->cde_app; struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; /* safe to go off the mutex in cancel_deleter since app is @@ -142,38 +144,38 @@ __must_hold(&cde_app->mutex) } } -static void gk20a_cde_stop(struct gk20a *g) -__must_hold(&cde_app->mutex) +static void gk20a_cde_stop(struct nvgpu_os_linux *l) +__must_hold(&l->cde_app->mutex) { - struct gk20a_cde_app *cde_app = &g->cde_app; + struct gk20a_cde_app *cde_app = &l->cde_app; /* prevent further conversions and delayed works from working */ cde_app->initialised = false; /* free all data, empty the list */ - gk20a_cde_remove_contexts(g); + gk20a_cde_remove_contexts(l); } -void gk20a_cde_destroy(struct gk20a *g) -__acquires(&cde_app->mutex) -__releases(&cde_app->mutex) +void gk20a_cde_destroy(struct nvgpu_os_linux *l) +__acquires(&l->cde_app->mutex) +__releases(&l->cde_app->mutex) { - struct gk20a_cde_app *cde_app = &g->cde_app; + struct gk20a_cde_app *cde_app = &l->cde_app; if (!cde_app->initialised) return; nvgpu_mutex_acquire(&cde_app->mutex); - gk20a_cde_stop(g); + gk20a_cde_stop(l); nvgpu_mutex_release(&cde_app->mutex); nvgpu_mutex_destroy(&cde_app->mutex); } -void gk20a_cde_suspend(struct gk20a *g) -__acquires(&cde_app->mutex) -__releases(&cde_app->mutex) +void gk20a_cde_suspend(struct nvgpu_os_linux *l) +__acquires(&l->cde_app->mutex) +__releases(&l->cde_app->mutex) { - struct gk20a_cde_app *cde_app = &g->cde_app; + struct gk20a_cde_app *cde_app = &l->cde_app; struct gk20a_cde_ctx *cde_ctx, *cde_ctx_save; if (!cde_app->initialised) @@ -195,13 +197,13 @@ __releases(&cde_app->mutex) } -static int gk20a_cde_create_context(struct gk20a *g) -__must_hold(&cde_app->mutex) +static int gk20a_cde_create_context(struct nvgpu_os_linux *l) +__must_hold(&l->cde_app->mutex) { - struct gk20a_cde_app *cde_app = &g->cde_app; + struct gk20a_cde_app *cde_app = &l->cde_app; struct gk20a_cde_ctx *cde_ctx; - cde_ctx = gk20a_cde_allocate_context(g); + cde_ctx = gk20a_cde_allocate_context(l); if (IS_ERR(cde_ctx)) return PTR_ERR(cde_ctx); @@ -213,21 +215,21 @@ __must_hold(&cde_app->mutex) return 0; } -static int gk20a_cde_create_contexts(struct gk20a *g) -__must_hold(&g->cde_app->mutex) +static int gk20a_cde_create_contexts(struct nvgpu_os_linux *l) +__must_hold(&l->cde_app->mutex) { int err; int i; for (i = 0; i < NUM_CDE_CONTEXTS; i++) { - err = gk20a_cde_create_context(g); + err = gk20a_cde_create_context(l); if (err) goto out; } return 0; out: - gk20a_cde_remove_contexts(g); + gk20a_cde_remove_contexts(l); return err; } @@ -236,7 +238,8 @@ static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, struct gk20a_cde_hdr_buf *buf) { struct nvgpu_mem *mem; - struct gk20a *g = cde_ctx->g; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; int err; /* check that the file can hold the buf */ @@ -276,7 +279,8 @@ static int gk20a_init_cde_buf(struct gk20a_cde_ctx *cde_ctx, static int gk20a_replace_data(struct gk20a_cde_ctx *cde_ctx, void *target, int type, s32 shift, u64 mask, u64 value) { - struct gk20a *g = cde_ctx->g; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; u32 *target_mem_ptr = target; u64 *target_mem_ptr_u64 = target; u64 current_value, new_value; @@ -325,7 +329,8 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, { struct nvgpu_mem *source_mem; struct nvgpu_mem *target_mem; - struct gk20a *g = cde_ctx->g; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; u32 *target_mem_ptr; u64 vaddr; int err; @@ -373,7 +378,8 @@ static int gk20a_init_cde_replace(struct gk20a_cde_ctx *cde_ctx, static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) { - struct gk20a *g = cde_ctx->g; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; struct nvgpu_mem *target_mem; u32 *target_mem_ptr; u64 new_data; @@ -464,7 +470,8 @@ static int gk20a_init_cde_param(struct gk20a_cde_ctx *cde_ctx, struct gk20a_cde_hdr_param *param) { struct nvgpu_mem *target_mem; - struct gk20a *g = cde_ctx->g; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; if (param->target_buf >= cde_ctx->num_bufs) { nvgpu_warn(g, "cde: invalid buffer parameter. param idx = %d, target_buf=%u, num_bufs=%u", @@ -506,7 +513,8 @@ static int gk20a_init_cde_required_class(struct gk20a_cde_ctx *cde_ctx, struct nvgpu_firmware *img, u32 required_class) { - struct gk20a *g = cde_ctx->g; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; struct nvgpu_alloc_obj_ctx_args alloc_obj_ctx; int err; @@ -532,7 +540,8 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, struct gk20a_cde_cmd_elem *cmd_elem, u32 num_elems) { - struct gk20a *g = cde_ctx->g; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; struct nvgpu_gpfifo **gpfifo, *gpfifo_elem; u32 *num_entries; unsigned int i; @@ -551,7 +560,7 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, } /* allocate gpfifo entries to be pushed */ - *gpfifo = nvgpu_kzalloc(cde_ctx->g, + *gpfifo = nvgpu_kzalloc(g, sizeof(struct nvgpu_gpfifo) * num_elems); if (!*gpfifo) { nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries"); @@ -596,7 +605,8 @@ static int gk20a_init_cde_command(struct gk20a_cde_ctx *cde_ctx, static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) { - struct gk20a *g = cde_ctx->g; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; unsigned long init_bytes = cde_ctx->init_cmd_num_entries * sizeof(struct nvgpu_gpfifo); unsigned long conv_bytes = cde_ctx->convert_cmd_num_entries * @@ -605,7 +615,7 @@ static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) struct nvgpu_gpfifo *combined_cmd; /* allocate buffer that has space for both */ - combined_cmd = nvgpu_kzalloc(cde_ctx->g, total_bytes); + combined_cmd = nvgpu_kzalloc(g, total_bytes); if (!combined_cmd) { nvgpu_warn(g, "cde: could not allocate memory for gpfifo entries"); @@ -617,8 +627,8 @@ static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) memcpy(combined_cmd + cde_ctx->init_cmd_num_entries, cde_ctx->convert_cmd, conv_bytes); - nvgpu_kfree(cde_ctx->g, cde_ctx->init_convert_cmd); - nvgpu_kfree(cde_ctx->g, cde_ctx->convert_cmd); + nvgpu_kfree(g, cde_ctx->init_convert_cmd); + nvgpu_kfree(g, cde_ctx->convert_cmd); cde_ctx->init_convert_cmd = combined_cmd; cde_ctx->convert_cmd = combined_cmd @@ -630,8 +640,9 @@ static int gk20a_cde_pack_cmdbufs(struct gk20a_cde_ctx *cde_ctx) static int gk20a_init_cde_img(struct gk20a_cde_ctx *cde_ctx, struct nvgpu_firmware *img) { - struct gk20a *g = cde_ctx->g; - struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + struct gk20a_cde_app *cde_app = &l->cde_app; u32 *data = (u32 *)img->data; u32 num_of_elems; struct gk20a_cde_hdr_elem *elem; @@ -724,7 +735,8 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx, u32 op, struct nvgpu_fence *fence, u32 flags, struct gk20a_fence **fence_out) { - struct gk20a *g = cde_ctx->g; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; struct nvgpu_gpfifo *gpfifo = NULL; int num_entries = 0; @@ -756,7 +768,7 @@ static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) __acquires(&cde_app->mutex) __releases(&cde_app->mutex) { - struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; + struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; gk20a_dbg(gpu_dbg_cde_ctx, "releasing use on %p", cde_ctx); trace_gk20a_cde_release(cde_ctx); @@ -781,8 +793,9 @@ __releases(&cde_app->mutex) struct delayed_work *delay_work = to_delayed_work(work); struct gk20a_cde_ctx *cde_ctx = container_of(delay_work, struct gk20a_cde_ctx, ctx_deleter_work); - struct gk20a_cde_app *cde_app = &cde_ctx->g->cde_app; - struct gk20a *g = cde_ctx->g; + struct gk20a_cde_app *cde_app = &cde_ctx->l->cde_app; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; int err; /* someone has just taken it? engine deletion started? */ @@ -823,10 +836,11 @@ out: gk20a_idle(g); } -static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct gk20a *g) +static struct gk20a_cde_ctx *gk20a_cde_do_get_context(struct nvgpu_os_linux *l) __must_hold(&cde_app->mutex) { - struct gk20a_cde_app *cde_app = &g->cde_app; + struct gk20a *g = &l->g; + struct gk20a_cde_app *cde_app = &l->cde_app; struct gk20a_cde_ctx *cde_ctx; /* exhausted? */ @@ -862,7 +876,7 @@ __must_hold(&cde_app->mutex) "cde: no free contexts, count=%d", cde_app->ctx_count); - cde_ctx = gk20a_cde_allocate_context(g); + cde_ctx = gk20a_cde_allocate_context(l); if (IS_ERR(cde_ctx)) { nvgpu_warn(g, "cde: cannot allocate context: %ld", PTR_ERR(cde_ctx)); @@ -881,11 +895,12 @@ __must_hold(&cde_app->mutex) return cde_ctx; } -static struct gk20a_cde_ctx *gk20a_cde_get_context(struct gk20a *g) +static struct gk20a_cde_ctx *gk20a_cde_get_context(struct nvgpu_os_linux *l) __releases(&cde_app->mutex) __acquires(&cde_app->mutex) { - struct gk20a_cde_app *cde_app = &g->cde_app; + struct gk20a *g = &l->g; + struct gk20a_cde_app *cde_app = &l->cde_app; struct gk20a_cde_ctx *cde_ctx = NULL; struct nvgpu_timeout timeout; @@ -893,7 +908,7 @@ __acquires(&cde_app->mutex) NVGPU_TIMER_CPU_TIMER); do { - cde_ctx = gk20a_cde_do_get_context(g); + cde_ctx = gk20a_cde_do_get_context(l); if (PTR_ERR(cde_ctx) != -EAGAIN) break; @@ -906,8 +921,9 @@ __acquires(&cde_app->mutex) return cde_ctx; } -static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g) +static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l) { + struct gk20a *g = &l->g; struct gk20a_cde_ctx *cde_ctx; int ret; @@ -915,7 +931,7 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g) if (!cde_ctx) return ERR_PTR(-ENOMEM); - cde_ctx->g = g; + cde_ctx->l = l; cde_ctx->dev = dev_from_gk20a(g); ret = gk20a_cde_load(cde_ctx); @@ -935,16 +951,17 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g) return cde_ctx; } -int gk20a_cde_convert(struct gk20a *g, +int gk20a_cde_convert(struct nvgpu_os_linux *l, struct dma_buf *compbits_scatter_buf, u64 compbits_byte_offset, u64 scatterbuffer_byte_offset, struct nvgpu_fence *fence, u32 __flags, struct gk20a_cde_param *params, int num_params, struct gk20a_fence **fence_out) -__acquires(&cde_app->mutex) -__releases(&cde_app->mutex) +__acquires(&l->cde_app->mutex) +__releases(&l->cde_app->mutex) { + struct gk20a *g = &l->g; struct gk20a_cde_ctx *cde_ctx = NULL; struct gk20a_comptags comptags; u64 mapped_compbits_offset = 0; @@ -972,9 +989,9 @@ __releases(&cde_app->mutex) if (err) return err; - nvgpu_mutex_acquire(&g->cde_app.mutex); - cde_ctx = gk20a_cde_get_context(g); - nvgpu_mutex_release(&g->cde_app.mutex); + nvgpu_mutex_acquire(&l->cde_app.mutex); + cde_ctx = gk20a_cde_get_context(l); + nvgpu_mutex_release(&l->cde_app.mutex); if (IS_ERR(cde_ctx)) { err = PTR_ERR(cde_ctx); goto exit_idle; @@ -1158,8 +1175,9 @@ __acquires(&cde_app->mutex) __releases(&cde_app->mutex) { struct gk20a_cde_ctx *cde_ctx = data; - struct gk20a *g = cde_ctx->g; - struct gk20a_cde_app *cde_app = &g->cde_app; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; + struct gk20a_cde_app *cde_app = &l->cde_app; bool channel_idle; channel_gk20a_joblist_lock(ch); @@ -1188,7 +1206,7 @@ __releases(&cde_app->mutex) /* mark it to be deleted, replace with a new one */ nvgpu_mutex_acquire(&cde_app->mutex); cde_ctx->is_temporary = true; - if (gk20a_cde_create_context(g)) { + if (gk20a_cde_create_context(l)) { nvgpu_err(g, "cde: can't replace context"); } nvgpu_mutex_release(&cde_app->mutex); @@ -1208,7 +1226,8 @@ __releases(&cde_app->mutex) static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) { - struct gk20a *g = cde_ctx->g; + struct nvgpu_os_linux *l = cde_ctx->l; + struct gk20a *g = &l->g; struct nvgpu_firmware *img; struct channel_gk20a *ch; struct gr_gk20a *gr = &g->gr; @@ -1288,11 +1307,12 @@ err_get_gk20a_channel: return err; } -int gk20a_cde_reload(struct gk20a *g) -__acquires(&cde_app->mutex) -__releases(&cde_app->mutex) +int gk20a_cde_reload(struct nvgpu_os_linux *l) +__acquires(&l->cde_app->mutex) +__releases(&l->cde_app->mutex) { - struct gk20a_cde_app *cde_app = &g->cde_app; + struct gk20a *g = &l->g; + struct gk20a_cde_app *cde_app = &l->cde_app; int err; if (!cde_app->initialised) @@ -1304,9 +1324,9 @@ __releases(&cde_app->mutex) nvgpu_mutex_acquire(&cde_app->mutex); - gk20a_cde_stop(g); + gk20a_cde_stop(l); - err = gk20a_cde_create_contexts(g); + err = gk20a_cde_create_contexts(l); if (!err) cde_app->initialised = true; @@ -1316,11 +1336,11 @@ __releases(&cde_app->mutex) return err; } -int gk20a_init_cde_support(struct gk20a *g) +int gk20a_init_cde_support(struct nvgpu_os_linux *l) __acquires(&cde_app->mutex) __releases(&cde_app->mutex) { - struct gk20a_cde_app *cde_app = &g->cde_app; + struct gk20a_cde_app *cde_app = &l->cde_app; int err; if (cde_app->initialised) @@ -1340,7 +1360,7 @@ __releases(&cde_app->mutex) cde_app->ctx_count_top = 0; cde_app->ctx_usecount = 0; - err = gk20a_cde_create_contexts(g); + err = gk20a_cde_create_contexts(l); if (!err) cde_app->initialised = true; @@ -1393,7 +1413,7 @@ enum cde_launch_patch_id { #define MAX_CDE_LAUNCH_PATCHES 32 static int gk20a_buffer_convert_gpu_to_cde_v1( - struct gk20a *g, + struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer, u64 offset, u64 compbits_hoffset, u64 compbits_voffset, u64 scatterbuffer_offset, @@ -1401,6 +1421,7 @@ static int gk20a_buffer_convert_gpu_to_cde_v1( u32 submit_flags, struct nvgpu_fence *fence_in, struct gk20a_buffer_state *state) { + struct gk20a *g = &l->g; struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES]; int param = 0; int err = 0; @@ -1426,6 +1447,7 @@ static int gk20a_buffer_convert_gpu_to_cde_v1( if (g->ops.cde.get_program_numbers) g->ops.cde.get_program_numbers(g, block_height_log2, + l->cde_app.shader_parameter, &hprog, &vprog); else { nvgpu_warn(g, "cde: chip not supported"); @@ -1450,11 +1472,11 @@ static int gk20a_buffer_convert_gpu_to_cde_v1( wgx, wgy, gridw_h, gridh_h, gridw_v, gridh_v); gk20a_dbg(gpu_dbg_cde, "hprog=%d, offset=0x%x, regs=%d, vprog=%d, offset=0x%x, regs=%d", hprog, - g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog], - g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog], + l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog], + l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog], vprog, - g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog], - g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); + l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog], + l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); /* Write parameters */ #define WRITE_PATCH(NAME, VALUE) \ @@ -1483,40 +1505,40 @@ static int gk20a_buffer_convert_gpu_to_cde_v1( WRITE_PATCH(PATCH_V_VPC_CURRENT_GRID_SIZE_Z, 1); WRITE_PATCH(PATCH_H_QMD_PROGRAM_OFFSET, - g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]); + l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][hprog]); WRITE_PATCH(PATCH_H_QMD_REGISTER_COUNT, - g->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]); + l->cde_app.arrays[ARRAY_REGISTER_COUNT][hprog]); WRITE_PATCH(PATCH_V_QMD_PROGRAM_OFFSET, - g->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]); + l->cde_app.arrays[ARRAY_PROGRAM_OFFSET][vprog]); WRITE_PATCH(PATCH_V_QMD_REGISTER_COUNT, - g->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); + l->cde_app.arrays[ARRAY_REGISTER_COUNT][vprog]); if (consumer & NVGPU_GPU_COMPBITS_CDEH) { WRITE_PATCH(PATCH_H_LAUNCH_WORD1, - g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); WRITE_PATCH(PATCH_H_LAUNCH_WORD2, - g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); } else { WRITE_PATCH(PATCH_H_LAUNCH_WORD1, - g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); WRITE_PATCH(PATCH_H_LAUNCH_WORD2, - g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); } if (consumer & NVGPU_GPU_COMPBITS_CDEV) { WRITE_PATCH(PATCH_V_LAUNCH_WORD1, - g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][0]); WRITE_PATCH(PATCH_V_LAUNCH_WORD2, - g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][1]); } else { WRITE_PATCH(PATCH_V_LAUNCH_WORD1, - g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][2]); WRITE_PATCH(PATCH_V_LAUNCH_WORD2, - g->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); + l->cde_app.arrays[ARRAY_LAUNCH_COMMAND][3]); } #undef WRITE_PATCH - err = gk20a_cde_convert(g, dmabuf, + err = gk20a_cde_convert(l, dmabuf, compbits_hoffset, scatterbuffer_offset, fence_in, submit_flags, @@ -1534,30 +1556,31 @@ out: } static int gk20a_buffer_convert_gpu_to_cde( - struct gk20a *g, struct dma_buf *dmabuf, u32 consumer, + struct nvgpu_os_linux *l, struct dma_buf *dmabuf, u32 consumer, u64 offset, u64 compbits_hoffset, u64 compbits_voffset, u64 scatterbuffer_offset, u32 width, u32 height, u32 block_height_log2, u32 submit_flags, struct nvgpu_fence *fence_in, struct gk20a_buffer_state *state) { + struct gk20a *g = &l->g; int err = 0; - if (!g->cde_app.initialised) + if (!l->cde_app.initialised) return -ENOSYS; gk20a_dbg(gpu_dbg_cde, "firmware version = %d\n", - g->cde_app.firmware_version); + l->cde_app.firmware_version); - if (g->cde_app.firmware_version == 1) { + if (l->cde_app.firmware_version == 1) { err = gk20a_buffer_convert_gpu_to_cde_v1( - g, dmabuf, consumer, offset, compbits_hoffset, + l, dmabuf, consumer, offset, compbits_hoffset, compbits_voffset, scatterbuffer_offset, width, height, block_height_log2, submit_flags, fence_in, state); } else { nvgpu_err(g, "unsupported CDE firmware version %d", - g->cde_app.firmware_version); + l->cde_app.firmware_version); err = -EINVAL; } @@ -1565,7 +1588,7 @@ static int gk20a_buffer_convert_gpu_to_cde( } int gk20a_prepare_compressible_read( - struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, + struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, u64 compbits_hoffset, u64 compbits_voffset, u64 scatterbuffer_offset, u32 width, u32 height, u32 block_height_log2, @@ -1573,6 +1596,7 @@ int gk20a_prepare_compressible_read( u32 *valid_compbits, u32 *zbc_color, struct gk20a_fence **fence_out) { + struct gk20a *g = &l->g; int err = 0; struct gk20a_buffer_state *state; struct dma_buf *dmabuf; @@ -1606,7 +1630,7 @@ int gk20a_prepare_compressible_read( if ((state->valid_compbits & NVGPU_GPU_COMPBITS_GPU) && missing_cde_bits) { err = gk20a_buffer_convert_gpu_to_cde( - g, dmabuf, + l, dmabuf, missing_cde_bits, offset, compbits_hoffset, compbits_voffset, scatterbuffer_offset, diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h b/drivers/gpu/nvgpu/common/linux/cde.h similarity index 95% rename from drivers/gpu/nvgpu/gk20a/cde_gk20a.h rename to drivers/gpu/nvgpu/common/linux/cde.h index 4f400bf3b..22732a2af 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.h +++ b/drivers/gpu/nvgpu/common/linux/cde.h @@ -19,8 +19,6 @@ #ifndef _CDE_GK20A_H_ #define _CDE_GK20A_H_ -#include "mm_gk20a.h" - #define MAX_CDE_BUFS 10 #define MAX_CDE_PARAMS 64 #define MAX_CDE_USER_PARAMS 40 @@ -214,7 +212,7 @@ struct gk20a_cde_param { }; struct gk20a_cde_ctx { - struct gk20a *g; + struct nvgpu_os_linux *l; struct device *dev; /* channel related data */ @@ -284,11 +282,11 @@ struct gk20a_cde_app { u32 shader_parameter; }; -void gk20a_cde_destroy(struct gk20a *g); -void gk20a_cde_suspend(struct gk20a *g); -int gk20a_init_cde_support(struct gk20a *g); -int gk20a_cde_reload(struct gk20a *g); -int gk20a_cde_convert(struct gk20a *g, +void gk20a_cde_destroy(struct nvgpu_os_linux *l); +void gk20a_cde_suspend(struct nvgpu_os_linux *l); +int gk20a_init_cde_support(struct nvgpu_os_linux *l); +int gk20a_cde_reload(struct nvgpu_os_linux *l); +int gk20a_cde_convert(struct nvgpu_os_linux *l, struct dma_buf *compbits_buf, u64 compbits_byte_offset, u64 scatterbuffer_byte_offset, @@ -297,7 +295,7 @@ int gk20a_cde_convert(struct gk20a *g, int num_params, struct gk20a_fence **fence_out); int gk20a_prepare_compressible_read( - struct gk20a *g, u32 buffer_fd, u32 request, u64 offset, + struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset, u64 compbits_hoffset, u64 compbits_voffset, u64 scatterbuffer_offset, u32 width, u32 height, u32 block_height_log2, diff --git a/drivers/gpu/nvgpu/common/linux/debug_cde.c b/drivers/gpu/nvgpu/common/linux/debug_cde.c index 40cc64a4f..cbea83b96 100644 --- a/drivers/gpu/nvgpu/common/linux/debug_cde.c +++ b/drivers/gpu/nvgpu/common/linux/debug_cde.c @@ -22,8 +22,8 @@ static ssize_t gk20a_cde_reload_write(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos) { - struct gk20a *g = file->private_data; - gk20a_cde_reload(g); + struct nvgpu_os_linux *l = file->private_data; + gk20a_cde_reload(l); return count; } @@ -41,13 +41,13 @@ void gk20a_cde_debugfs_init(struct gk20a *g) return; debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO, - l->debugfs, &g->cde_app.shader_parameter); + l->debugfs, &l->cde_app.shader_parameter); debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO, - l->debugfs, &g->cde_app.ctx_count); + l->debugfs, &l->cde_app.ctx_count); debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO, - l->debugfs, &g->cde_app.ctx_usecount); + l->debugfs, &l->cde_app.ctx_usecount); debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO, - l->debugfs, &g->cde_app.ctx_count_top); + l->debugfs, &l->cde_app.ctx_count_top); debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs, - g, &gk20a_cde_reload_fops); + l, &gk20a_cde_reload_fops); } diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c index 0d79b1431..0357f098f 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c @@ -138,6 +138,7 @@ static int gk20a_ctrl_prepare_compressible_read( struct gk20a *g, struct nvgpu_gpu_prepare_compressible_read_args *args) { + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); struct nvgpu_fence fence; struct gk20a_fence *fence_out = NULL; int ret = 0; @@ -146,7 +147,7 @@ static int gk20a_ctrl_prepare_compressible_read( fence.id = args->fence.syncpt_id; fence.value = args->fence.syncpt_value; - ret = gk20a_prepare_compressible_read(g, args->handle, + ret = gk20a_prepare_compressible_read(l, args->handle, args->request_compbits, args->offset, args->compbits_hoffset, args->compbits_voffset, args->scatterbuffer_offset, diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index 6a590baaa..509930c77 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c @@ -39,6 +39,7 @@ #include "pci.h" #include "module.h" #include "intr.h" +#include "cde.h" #ifdef CONFIG_TEGRA_19x_GPU #include "nvgpu_gpuid_t19x.h" #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION @@ -185,7 +186,7 @@ int gk20a_pm_finalize_poweron(struct device *dev) gk20a_scale_resume(dev_from_gk20a(g)); if (platform->has_cde) - gk20a_init_cde_support(g); + gk20a_init_cde_support(l); done: if (err) @@ -197,6 +198,7 @@ done: static int gk20a_pm_prepare_poweroff(struct device *dev) { struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); int ret = 0; struct gk20a_platform *platform = gk20a_get_platform(dev); @@ -207,8 +209,15 @@ static int gk20a_pm_prepare_poweroff(struct device *dev) if (!g->power_on) goto done; + if (gk20a_fifo_is_engine_busy(g)) { + ret = -EBUSY; + goto done; + } + gk20a_scale_suspend(dev); + gk20a_cde_suspend(l); + ret = gk20a_prepare_poweroff(g); if (ret) goto error; @@ -974,6 +983,7 @@ static int __exit gk20a_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); struct gk20a_platform *platform = gk20a_get_platform(dev); gk20a_dbg_fn(""); @@ -982,7 +992,7 @@ static int __exit gk20a_remove(struct platform_device *pdev) return vgpu_remove(pdev); if (platform->has_cde) - gk20a_cde_destroy(g); + gk20a_cde_destroy(l); gk20a_ctxsw_trace_cleanup(g); diff --git a/drivers/gpu/nvgpu/common/linux/os_linux.h b/drivers/gpu/nvgpu/common/linux/os_linux.h index ed8364a93..160a5738d 100644 --- a/drivers/gpu/nvgpu/common/linux/os_linux.h +++ b/drivers/gpu/nvgpu/common/linux/os_linux.h @@ -19,6 +19,7 @@ #include #include "gk20a/gk20a.h" +#include "cde.h" struct nvgpu_os_linux { struct gk20a g; @@ -108,6 +109,7 @@ struct nvgpu_os_linux { struct dentry *debugfs_force_preemption_gfxp; struct dentry *debugfs_dump_ctxsw_stats; #endif + struct gk20a_cde_app cde_app; }; static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 0cd314d6f..63ea5bc46 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -117,9 +117,6 @@ int gk20a_prepare_poweroff(struct gk20a *g) if (gk20a_fifo_is_engine_busy(g)) return -EBUSY; - /* cancel any pending cde work */ - gk20a_cde_suspend(g); - gk20a_ce_suspend(g); ret = gk20a_channel_suspend(g); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index ab715bdc4..69cb22531 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -60,7 +60,6 @@ struct nvgpu_cpu_time_correlation_sample; #include "pmu_gk20a.h" #include "priv_ring_gk20a.h" #include "therm_gk20a.h" -#include "cde_gk20a.h" #include "sched_gk20a.h" #ifdef CONFIG_ARCH_TEGRA_18x_SOC #include "clk/clk.h" @@ -928,6 +927,7 @@ struct gpu_ops { struct { void (*get_program_numbers)(struct gk20a *g, u32 block_height_log2, + u32 shader_parameter, int *hprog, int *vprog); bool (*need_scatter_buffer)(struct gk20a *g); int (*populate_scatter_buffer)(struct gk20a *g, @@ -1217,7 +1217,6 @@ struct gk20a { struct gk20a_sched_ctrl sched_ctrl; - struct gk20a_cde_app cde_app; bool mmu_debug_ctrl; u32 tpc_fs_mask_user; diff --git a/drivers/gpu/nvgpu/gm20b/cde_gm20b.c b/drivers/gpu/nvgpu/gm20b/cde_gm20b.c index f8267d1dd..de7cf8729 100644 --- a/drivers/gpu/nvgpu/gm20b/cde_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/cde_gm20b.c @@ -28,15 +28,16 @@ enum programs { void gm20b_cde_get_program_numbers(struct gk20a *g, u32 block_height_log2, + u32 shader_parameter, int *hprog_out, int *vprog_out) { int hprog = PROG_HPASS; int vprog = (block_height_log2 >= 2) ? PROG_VPASS_LARGE : PROG_VPASS_SMALL; - if (g->cde_app.shader_parameter == 1) { + if (shader_parameter == 1) { hprog = PROG_PASSTHROUGH; vprog = PROG_PASSTHROUGH; - } else if (g->cde_app.shader_parameter == 2) { + } else if (shader_parameter == 2) { hprog = PROG_HPASS_DEBUG; vprog = (block_height_log2 >= 2) ? PROG_VPASS_LARGE_DEBUG : diff --git a/drivers/gpu/nvgpu/gm20b/cde_gm20b.h b/drivers/gpu/nvgpu/gm20b/cde_gm20b.h index f2ea20a08..0ea423adc 100644 --- a/drivers/gpu/nvgpu/gm20b/cde_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/cde_gm20b.h @@ -20,6 +20,7 @@ struct gk20a; void gm20b_cde_get_program_numbers(struct gk20a *g, u32 block_height_log2, + u32 shader_parameter, int *hprog_out, int *vprog_out); #endif diff --git a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c index 685ddbc41..1ddbcba65 100644 --- a/drivers/gpu/nvgpu/gp10b/cde_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/cde_gp10b.c @@ -32,17 +32,18 @@ enum gp10b_programs { void gp10b_cde_get_program_numbers(struct gk20a *g, u32 block_height_log2, + u32 shader_parameter, int *hprog_out, int *vprog_out) { int hprog, vprog; - if (g->cde_app.shader_parameter == 1) { + if (shader_parameter == 1) { hprog = GP10B_PROG_PASSTHROUGH; vprog = GP10B_PROG_PASSTHROUGH; } else { hprog = GP10B_PROG_HPASS; vprog = GP10B_PROG_VPASS; - if (g->cde_app.shader_parameter == 2) { + if (shader_parameter == 2) { hprog = GP10B_PROG_HPASS_DEBUG; vprog = GP10B_PROG_VPASS_DEBUG; } diff --git a/drivers/gpu/nvgpu/gp10b/cde_gp10b.h b/drivers/gpu/nvgpu/gp10b/cde_gp10b.h index 3ee6027c1..7ccfe5602 100644 --- a/drivers/gpu/nvgpu/gp10b/cde_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/cde_gp10b.h @@ -21,6 +21,7 @@ struct sg_table; void gp10b_cde_get_program_numbers(struct gk20a *g, u32 block_height_log2, + u32 shader_parameter, int *hprog_out, int *vprog_out); bool gp10b_need_scatter_buffer(struct gk20a *g); int gp10b_populate_scatter_buffer(struct gk20a *g,