diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 990972e4c..065e8ab10 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -129,28 +129,25 @@ static int channel_gk20a_commit_userd(struct channel_gk20a *c) { u32 addr_lo; u32 addr_hi; - void *inst_ptr; struct gk20a *g = c->g; gk20a_dbg_fn(""); - inst_ptr = c->inst_block.cpu_va; - if (!inst_ptr) - return -ENOMEM; - addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); addr_hi = u64_hi32(c->userd_iova); gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", c->hw_chid, (u64)c->userd_iova); - gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(), + gk20a_mem_wr32(g, &c->inst_block, + ram_in_ramfc_w() + ram_fc_userd_w(), (g->mm.vidmem_is_vidmem ? pbdma_userd_target_sys_mem_ncoh_f() : pbdma_userd_target_vid_mem_f()) | pbdma_userd_addr_f(addr_lo)); - gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(), + gk20a_mem_wr32(g, &c->inst_block, + ram_in_ramfc_w() + ram_fc_userd_hi_w(), pbdma_userd_hi_addr_f(addr_hi)); return 0; @@ -186,13 +183,8 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g, static int channel_gk20a_set_schedule_params(struct channel_gk20a *c) { - void *inst_ptr; int shift = 0, value = 0; - inst_ptr = c->inst_block.cpu_va; - if (!inst_ptr) - return -ENOMEM; - gk20a_channel_get_timescale_from_timeslice(c->g, c->timeslice_us, &value, &shift); @@ -203,7 +195,7 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c) WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid)); /* set new timeslice */ - gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), + gk20a_mem_wr32(c->g, &c->inst_block, ram_fc_runlist_timeslice_w(), value | (shift << 12) | fifo_runlist_timeslice_enable_true_f()); @@ -255,33 +247,30 @@ u32 channel_gk20a_pbdma_acquire_val(struct channel_gk20a *c) int channel_gk20a_setup_ramfc(struct channel_gk20a *c, u64 gpfifo_base, u32 gpfifo_entries, u32 flags) { - void *inst_ptr; + struct gk20a *g = c->g; + struct mem_desc *mem = &c->inst_block; gk20a_dbg_fn(""); - inst_ptr = c->inst_block.cpu_va; - if (!inst_ptr) - return -ENOMEM; + gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v()); - memset(inst_ptr, 0, ram_fc_size_val_v()); - - gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(), + gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(), pbdma_gp_base_offset_f( u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); - gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(), + gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(), pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); - gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(), + gk20a_mem_wr32(g, mem, ram_fc_signature_w(), c->g->ops.fifo.get_pbdma_signature(c->g)); - gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(), + gk20a_mem_wr32(g, mem, ram_fc_formats_w(), pbdma_formats_gp_fermi0_f() | pbdma_formats_pb_fermi1_f() | pbdma_formats_mp_fermi0_f()); - gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(), + gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(), pbdma_pb_header_priv_user_f() | pbdma_pb_header_method_zero_f() | pbdma_pb_header_subchannel_zero_f() | @@ -289,47 +278,49 @@ int channel_gk20a_setup_ramfc(struct channel_gk20a *c, pbdma_pb_header_first_true_f() | pbdma_pb_header_type_inc_f()); - gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(), + gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(), pbdma_subdevice_id_f(1) | pbdma_subdevice_status_active_f() | pbdma_subdevice_channel_dma_enable_f()); - gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f()); + gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f()); - gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(), + gk20a_mem_wr32(g, mem, ram_fc_acquire_w(), channel_gk20a_pbdma_acquire_val(c)); - gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), + gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(), fifo_runlist_timeslice_timeout_128_f() | fifo_runlist_timeslice_timescale_3_f() | fifo_runlist_timeslice_enable_true_f()); - gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(), + gk20a_mem_wr32(g, mem, ram_fc_pb_timeslice_w(), fifo_pb_timeslice_timeout_16_f() | fifo_pb_timeslice_timescale_0_f() | fifo_pb_timeslice_enable_true_f()); - gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); + gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); return channel_gk20a_commit_userd(c); } static int channel_gk20a_setup_userd(struct channel_gk20a *c) { - BUG_ON(!c->userd_cpu_va); + struct gk20a *g = c->g; + struct mem_desc *mem = &g->fifo.userd; + u32 offset = c->hw_chid * g->fifo.userd_entry_size / sizeof(u32); gk20a_dbg_fn(""); - gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0); - gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0); - gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0); - gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0); - gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0); - gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0); - gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0); - gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0); - gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0); - gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_put_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_get_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0); + gk20a_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0); return 0; } diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 8840a3aed..b1355f921 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -130,7 +130,6 @@ struct channel_gk20a { struct mem_desc inst_block; struct mem_desc_sub ramfc; - void *userd_cpu_va; u64 userd_iova; u64 userd_gpu_va; diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c index c2285c8af..a3fa2ea53 100644 --- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c @@ -36,7 +36,7 @@ unsigned int gk20a_debug_trace_cmdbuf; struct ch_state { int pid; int refs; - u8 inst_block[0]; + u32 inst_block[0]; }; static const char * const ccsr_chan_status_str[] = { @@ -108,15 +108,15 @@ static void gk20a_debug_show_channel(struct gk20a *g, u32 channel = gk20a_readl(g, ccsr_channel_r(hw_chid)); u32 status = ccsr_channel_status_v(channel); u32 syncpointa, syncpointb; - void *inst_ptr; + u32 *inst_mem; if (!ch_state) return; - inst_ptr = &ch_state->inst_block[0]; + inst_mem = &ch_state->inst_block[0]; - syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w()); - syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w()); + syncpointa = inst_mem[ram_fc_syncpointa_w()]; + syncpointb = inst_mem[ram_fc_syncpointb_w()]; gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid, dev_name(g->dev), @@ -129,23 +129,22 @@ static void gk20a_debug_show_channel(struct gk20a *g, gk20a_debug_output(o, "TOP: %016llx PUT: %016llx GET: %016llx " "FETCH: %016llx\nHEADER: %08x COUNT: %08x\n" "SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n", - (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_top_level_get_w()) + - ((u64)gk20a_mem_rd32(inst_ptr, - ram_fc_pb_top_level_get_hi_w()) << 32ULL), - (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_w()) + - ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_put_hi_w()) << 32ULL), - (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_w()) + - ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_get_hi_w()) << 32ULL), - (u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_w()) + - ((u64)gk20a_mem_rd32(inst_ptr, ram_fc_pb_fetch_hi_w()) << 32ULL), - gk20a_mem_rd32(inst_ptr, ram_fc_pb_header_w()), - gk20a_mem_rd32(inst_ptr, ram_fc_pb_count_w()), + (u64)inst_mem[ram_fc_pb_top_level_get_w()] + + ((u64)inst_mem[ram_fc_pb_top_level_get_hi_w()] << 32ULL), + (u64)inst_mem[ram_fc_pb_put_w()] + + ((u64)inst_mem[ram_fc_pb_put_hi_w()] << 32ULL), + (u64)inst_mem[ram_fc_pb_get_w()] + + ((u64)inst_mem[ram_fc_pb_get_hi_w()] << 32ULL), + (u64)inst_mem[ram_fc_pb_fetch_w()] + + ((u64)inst_mem[ram_fc_pb_fetch_hi_w()] << 32ULL), + inst_mem[ram_fc_pb_header_w()], + inst_mem[ram_fc_pb_count_w()], syncpointa, syncpointb, - gk20a_mem_rd32(inst_ptr, ram_fc_semaphorea_w()), - gk20a_mem_rd32(inst_ptr, ram_fc_semaphoreb_w()), - gk20a_mem_rd32(inst_ptr, ram_fc_semaphorec_w()), - gk20a_mem_rd32(inst_ptr, ram_fc_semaphored_w())); + inst_mem[ram_fc_semaphorea_w()], + inst_mem[ram_fc_semaphoreb_w()], + inst_mem[ram_fc_semaphorec_w()], + inst_mem[ram_fc_semaphored_w()]); #ifdef CONFIG_TEGRA_GK20A if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v()) @@ -246,17 +245,15 @@ void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) for (chid = 0; chid < f->num_channels; chid++) { struct channel_gk20a *ch = &f->channel[chid]; - if (ch_state[chid]) { - if (ch->inst_block.cpu_va) { - ch_state[chid]->pid = ch->pid; - ch_state[chid]->refs = - atomic_read(&ch->ref_count); - memcpy(&ch_state[chid]->inst_block[0], - ch->inst_block.cpu_va, - ram_in_alloc_size_v()); - } - gk20a_channel_put(ch); - } + if (!ch_state[chid]) + continue; + + ch_state[chid]->pid = ch->pid; + ch_state[chid]->refs = atomic_read(&ch->ref_count); + gk20a_mem_rd_n(g, &ch->inst_block, 0, + &ch_state[chid]->inst_block[0], + ram_in_alloc_size_v()); + gk20a_channel_put(ch); } for (chid = 0; chid < f->num_channels; chid++) { if (ch_state[chid]) { diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index f9cddc416..edddcdc1b 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -619,7 +619,7 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, phys_addr_t pa; struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; struct gk20a_fecs_trace *trace = g->fecs_trace; - void *ctx_ptr; + struct mem_desc *mem = &ch_ctx->gr_ctx->mem; u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(ch); gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, @@ -634,10 +634,7 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, if (!pa) return -ENOMEM; - ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, - PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, 0, - pgprot_writecombine(PAGE_KERNEL)); - if (!ctx_ptr) + if (gk20a_mem_begin(g, mem)) return -ENOMEM; lo = u64_lo32(pa); @@ -646,18 +643,18 @@ static int gk20a_fecs_trace_bind_channel(struct gk20a *g, gk20a_dbg(gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, lo, GK20A_FECS_TRACE_NUM_RECORDS); - gk20a_mem_wr32(ctx_ptr - + ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), - 0, lo); - gk20a_mem_wr32(ctx_ptr - + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), - 0, ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi)); - gk20a_mem_wr32(ctx_ptr - + ctxsw_prog_main_image_context_timestamp_buffer_control_o(), - 0, ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), + lo); + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), + ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi)); + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_context_timestamp_buffer_control_o(), + ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( GK20A_FECS_TRACE_NUM_RECORDS)); - vunmap(ctx_ptr); + gk20a_mem_end(g, mem); gk20a_fecs_trace_hash_add(g, context_ptr, ch->pid); return 0; diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index dc3debf20..714003319 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -520,8 +520,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) mutex_init(&f->free_chs_mutex); for (chid = 0; chid < f->num_channels; chid++) { - f->channel[chid].userd_cpu_va = - f->userd.cpu_va + chid * f->userd_entry_size; f->channel[chid].userd_iova = g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0) + chid * f->userd_entry_size; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index f228cce49..2f85bf96c 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -201,7 +201,7 @@ struct gpu_ops { struct gr_ctx_desc *gr_ctx); void (*update_ctxsw_preemption_mode)(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, - void *ctx_ptr); + struct mem_desc *mem); int (*update_smpc_ctxsw_mode)(struct gk20a *g, struct channel_gk20a *c, bool enable); @@ -221,7 +221,8 @@ struct gpu_ops { int (*wait_empty)(struct gk20a *g, unsigned long end_jiffies, u32 expect_delay); void (*init_cyclestats)(struct gk20a *g); - void (*enable_cde_in_fecs)(void *ctx_ptr); + void (*enable_cde_in_fecs)(struct gk20a *g, + struct mem_desc *mem); int (*set_sm_debug_mode)(struct gk20a *g, struct channel_gk20a *ch, u64 sms, bool enable); void (*bpt_reg_info)(struct gk20a *g, @@ -484,7 +485,7 @@ struct gpu_ops { void (*cbc_clean)(struct gk20a *g); void (*tlb_invalidate)(struct vm_gk20a *vm); void (*set_big_page_size)(struct gk20a *g, - void *inst_ptr, int size); + struct mem_desc *mem, int size); u32 (*get_big_page_sizes)(void); u32 (*get_physical_addr_bits)(struct gk20a *g); int (*init_mm_setup_hw)(struct gk20a *g); @@ -493,7 +494,8 @@ struct gpu_ops { void (*remove_bar2_vm)(struct gk20a *g); const struct gk20a_mmu_level * (*get_mmu_levels)(struct gk20a *g, u32 big_page_size); - void (*init_pdb)(struct gk20a *g, void *inst_ptr, u64 pdb_addr); + void (*init_pdb)(struct gk20a *g, struct mem_desc *mem, + u64 pdb_addr); u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl, u32 flags); int (*bar1_bind)(struct gk20a *g, u64 bar1_iova); @@ -859,53 +861,6 @@ do { \ #define gk20a_dbg_info(fmt, arg...) \ gk20a_dbg(gpu_dbg_info, fmt, ##arg) -/* mem access with dbg_mem logging */ -static inline u8 gk20a_mem_rd08(void *ptr, int b) -{ - u8 _b = ((const u8 *)ptr)[b]; -#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM - gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, _b); -#endif - return _b; -} -static inline u16 gk20a_mem_rd16(void *ptr, int s) -{ - u16 _s = ((const u16 *)ptr)[s]; -#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM - gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, _s); -#endif - return _s; -} -static inline u32 gk20a_mem_rd32(void *ptr, int w) -{ - u32 _w = ((const u32 *)ptr)[w]; -#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM - gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + sizeof(u32)*w, _w); -#endif - return _w; -} -static inline void gk20a_mem_wr08(void *ptr, int b, u8 data) -{ -#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM - gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u8)*b, data); -#endif - ((u8 *)ptr)[b] = data; -} -static inline void gk20a_mem_wr16(void *ptr, int s, u16 data) -{ -#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM - gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u16)*s, data); -#endif - ((u16 *)ptr)[s] = data; -} -static inline void gk20a_mem_wr32(void *ptr, int w, u32 data) -{ -#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM - gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr+sizeof(u32)*w, data); -#endif - ((u32 *)ptr)[w] = data; -} - void gk20a_init_clk_ops(struct gpu_ops *gops); /* register accessors */ diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 4e7c36ee0..e7e6662a8 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -97,22 +97,18 @@ int gr_gk20a_get_ctx_id(struct gk20a *g, u32 *ctx_id) { struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; - void *ctx_ptr = NULL; /* Channel gr_ctx buffer is gpu cacheable. Flush and invalidate before cpu update. */ g->ops.mm.l2_flush(g, true); - ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, - PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, - 0, pgprot_writecombine(PAGE_KERNEL)); - if (!ctx_ptr) + if (gk20a_mem_begin(g, &ch_ctx->gr_ctx->mem)) return -ENOMEM; - *ctx_id = gk20a_mem_rd32(ctx_ptr + - ctxsw_prog_main_image_context_id_o(), 0); + *ctx_id = gk20a_mem_rd(g, &ch_ctx->gr_ctx->mem, + ctxsw_prog_main_image_context_id_o()); - vunmap(ctx_ptr); + gk20a_mem_end(g, &ch_ctx->gr_ctx->mem); return 0; } @@ -619,22 +615,17 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) { u32 addr_lo; u32 addr_hi; - void *inst_ptr = NULL; gk20a_dbg_fn(""); - inst_ptr = c->inst_block.cpu_va; - if (!inst_ptr) - return -ENOMEM; - addr_lo = u64_lo32(gpu_va) >> 12; addr_hi = u64_hi32(gpu_va); - gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_target_w(), + gk20a_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_target_w(), ram_in_gr_cs_wfi_f() | ram_in_gr_wfi_mode_virtual_f() | ram_in_gr_wfi_ptr_lo_f(addr_lo)); - gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(), + gk20a_mem_wr32(c->g, &c->inst_block, ram_in_gr_wfi_ptr_hi_w(), ram_in_gr_wfi_ptr_hi_f(addr_hi)); return 0; @@ -658,11 +649,7 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, return -EBUSY; } - ch_ctx->patch_ctx.mem.cpu_va = vmap(ch_ctx->patch_ctx.mem.pages, - PAGE_ALIGN(ch_ctx->patch_ctx.mem.size) >> PAGE_SHIFT, - 0, pgprot_writecombine(PAGE_KERNEL)); - - if (!ch_ctx->patch_ctx.mem.cpu_va) + if (gk20a_mem_begin(g, &ch_ctx->patch_ctx.mem)) return -ENOMEM; return 0; @@ -677,8 +664,7 @@ int gr_gk20a_ctx_patch_write_end(struct gk20a *g, return -EINVAL; } - vunmap(ch_ctx->patch_ctx.mem.cpu_va); - ch_ctx->patch_ctx.mem.cpu_va = NULL; + gk20a_mem_end(g, &ch_ctx->patch_ctx.mem); return 0; } @@ -687,7 +673,6 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g, u32 addr, u32 data, bool patch) { u32 patch_slot = 0; - void *patch_ptr = NULL; bool mapped_here = false; BUG_ON(patch != 0 && ch_ctx == NULL); @@ -708,11 +693,10 @@ int gr_gk20a_ctx_patch_write(struct gk20a *g, } else mapped_here = false; - patch_ptr = ch_ctx->patch_ctx.mem.cpu_va; patch_slot = ch_ctx->patch_ctx.data_count * 2; - gk20a_mem_wr32(patch_ptr, patch_slot++, addr); - gk20a_mem_wr32(patch_ptr, patch_slot++, data); + gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, addr); + gk20a_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot++, data); ch_ctx->patch_ctx.data_count++; @@ -760,16 +744,13 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) { struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct mem_desc *mem = &ch_ctx->gr_ctx->mem; u32 va_lo, va_hi, va; int ret = 0; - void *ctx_ptr = NULL; gk20a_dbg_fn(""); - ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, - PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, - 0, pgprot_writecombine(PAGE_KERNEL)); - if (!ctx_ptr) + if (gk20a_mem_begin(g, mem)) return -ENOMEM; if (ch_ctx->zcull_ctx.gpu_va == 0 && @@ -792,15 +773,17 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) goto clean_up; } - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_zcull_o(), ch_ctx->zcull_ctx.ctx_sw_mode); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, va); + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_zcull_ptr_o(), va); c->g->ops.fifo.enable_channel(c); clean_up: - vunmap(ctx_ptr); + gk20a_mem_end(g, mem); return ret; } @@ -1500,8 +1483,8 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, u32 ctx_header_words; u32 i; u32 data; - void *ctx_ptr = NULL; - void *gold_ptr = NULL; + struct mem_desc *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; + struct mem_desc *gr_mem = &ch_ctx->gr_ctx->mem; u32 err = 0; gk20a_dbg_fn(""); @@ -1527,16 +1510,10 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, if (err) goto clean_up; - gold_ptr = vmap(gr->global_ctx_buffer[GOLDEN_CTX].mem.pages, - PAGE_ALIGN(gr->global_ctx_buffer[GOLDEN_CTX].mem.size) >> - PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL)); - if (!gold_ptr) + if (gk20a_mem_begin(g, gold_mem)) goto clean_up; - ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, - PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, - 0, pgprot_writecombine(PAGE_KERNEL)); - if (!ctx_ptr) + if (gk20a_mem_begin(g, gr_mem)) goto clean_up; ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); @@ -1545,14 +1522,14 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, g->ops.mm.l2_flush(g, true); for (i = 0; i < ctx_header_words; i++) { - data = gk20a_mem_rd32(ctx_ptr, i); - gk20a_mem_wr32(gold_ptr, i, data); + data = gk20a_mem_rd32(g, gr_mem, i); + gk20a_mem_wr32(g, gold_mem, i, data); } - gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_o(), 0, + gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(), ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); - gk20a_mem_wr32(gold_ptr + ctxsw_prog_main_image_zcull_ptr_o(), 0, 0); + gk20a_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_ptr_o(), 0); gr_gk20a_commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); @@ -1568,12 +1545,12 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, goto clean_up; } - for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) - gr->ctx_vars.local_golden_image[i] = - gk20a_mem_rd32(gold_ptr, i); + gk20a_mem_rd_n(g, gold_mem, 0, + gr->ctx_vars.local_golden_image, + gr->ctx_vars.golden_image_size); } - gr_gk20a_commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); + gr_gk20a_commit_inst(c, gr_mem->gpu_va); gr->ctx_vars.golden_image_initialized = true; @@ -1586,10 +1563,8 @@ clean_up: else gk20a_dbg_fn("done"); - if (gold_ptr) - vunmap(gold_ptr); - if (ctx_ptr) - vunmap(ctx_ptr); + gk20a_mem_end(g, gold_mem); + gk20a_mem_end(g, gr_mem); mutex_unlock(&gr->ctx_mutex); return err; @@ -1600,7 +1575,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, bool enable_smpc_ctxsw) { struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; - void *ctx_ptr = NULL; + struct mem_desc *mem; u32 data; int ret; @@ -1611,46 +1586,39 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, return -EFAULT; } + mem = &ch_ctx->gr_ctx->mem; + c->g->ops.fifo.disable_channel(c); ret = c->g->ops.fifo.preempt_channel(c->g, c->hw_chid); if (ret) { - c->g->ops.fifo.enable_channel(c); - gk20a_err(dev_from_gk20a(g), - "failed to preempt channel\n"); - return ret; + gk20a_err(dev_from_gk20a(g), "failed to preempt channel"); + goto out; } /* Channel gr_ctx buffer is gpu cacheable. Flush and invalidate before cpu update. */ g->ops.mm.l2_flush(g, true); - if (!ch_ctx->gr_ctx) { - gk20a_err(dev_from_gk20a(g), "no graphics context allocated"); - return -EFAULT; + if (gk20a_mem_begin(g, mem)) { + ret = -ENOMEM; + goto out; } - ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, - PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, - 0, pgprot_writecombine(PAGE_KERNEL)); - if (!ctx_ptr) { - c->g->ops.fifo.enable_channel(c); - return -ENOMEM; - } - - data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); + data = gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_pm_o()); data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); data |= enable_smpc_ctxsw ? ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, - data); + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_pm_o(), + data); - vunmap(ctx_ptr); + gk20a_mem_end(g, mem); - /* enable channel */ +out: c->g->ops.fifo.enable_channel(c); - - return 0; + return ret; } int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, @@ -1659,8 +1627,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, { struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; - void *ctx_ptr = NULL; - void *pm_ctx_ptr; + struct mem_desc *gr_mem; u32 data, virt_addr; int ret; @@ -1671,6 +1638,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, return -EFAULT; } + gr_mem = &ch_ctx->gr_ctx->mem; + if (enable_hwpm_ctxsw) { if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) return 0; @@ -1721,29 +1690,22 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, } /* Now clear the buffer */ - pm_ctx_ptr = vmap(pm_ctx->mem.pages, - PAGE_ALIGN(pm_ctx->mem.size) >> PAGE_SHIFT, - 0, pgprot_writecombine(PAGE_KERNEL)); - - if (!pm_ctx_ptr) { + if (gk20a_mem_begin(g, &pm_ctx->mem)) { ret = -ENOMEM; goto cleanup_pm_buf; } - memset(pm_ctx_ptr, 0, pm_ctx->mem.size); + gk20a_memset(g, &pm_ctx->mem, 0, 0, pm_ctx->mem.size); - vunmap(pm_ctx_ptr); + gk20a_mem_end(g, &pm_ctx->mem); } - ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, - PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, - 0, pgprot_writecombine(PAGE_KERNEL)); - if (!ctx_ptr) { + if (gk20a_mem_begin(g, gr_mem)) { ret = -ENOMEM; goto cleanup_pm_buf; } - data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); + data = gk20a_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o()); data = data & ~ctxsw_prog_main_image_pm_mode_m(); if (enable_hwpm_ctxsw) { @@ -1760,10 +1722,10 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, data |= pm_ctx->pm_mode; - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); + gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data); + gk20a_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr); - vunmap(ctx_ptr); + gk20a_mem_end(g, gr_mem); /* enable channel */ c->g->ops.fifo.enable_channel(c); @@ -1788,9 +1750,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, u32 virt_addr_lo; u32 virt_addr_hi; u32 virt_addr = 0; - u32 i, v, data; + u32 v, data; int ret = 0; - void *ctx_ptr = NULL; + struct mem_desc *mem = &ch_ctx->gr_ctx->mem; gk20a_dbg_fn(""); @@ -1801,20 +1763,18 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, Flush and invalidate before cpu update. */ g->ops.mm.l2_flush(g, true); - ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, - PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, - 0, pgprot_writecombine(PAGE_KERNEL)); - if (!ctx_ptr) + if (gk20a_mem_begin(g, mem)) return -ENOMEM; - for (i = 0; i < gr->ctx_vars.golden_image_size / 4; i++) - gk20a_mem_wr32(ctx_ptr, i, gr->ctx_vars.local_golden_image[i]); + gk20a_mem_wr_n(g, mem, 0, + gr->ctx_vars.local_golden_image, + gr->ctx_vars.golden_image_size); if (g->ops.gr.enable_cde_in_fecs && c->cde) - g->ops.gr.enable_cde_in_fecs(ctx_ptr); + g->ops.gr.enable_cde_in_fecs(g, mem); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_save_ops_o(), 0, 0); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_num_restore_ops_o(), 0, 0); + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_save_ops_o(), 0); + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_num_restore_ops_o(), 0); /* set priv access map */ virt_addr_lo = @@ -1827,29 +1787,29 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, else data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_config_o(), 0, + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(), data); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), 0, + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_lo_o(), virt_addr_lo); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), 0, + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_addr_hi_o(), virt_addr_hi); /* disable verif features */ - v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0); + v = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o()); v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_misc_options_o(), 0, v); + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v); if (g->ops.gr.update_ctxsw_preemption_mode) - g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, ctx_ptr); + g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem); virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_count_o(), 0, + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), ch_ctx->patch_ctx.data_count); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_lo_o(), 0, + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_lo_o(), virt_addr_lo); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_patch_adr_hi_o(), 0, + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(), virt_addr_hi); /* Update main header region of the context buffer with the info needed @@ -1860,7 +1820,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, if (ch_ctx->pm_ctx.mem.gpu_va == 0) { gk20a_err(dev_from_gk20a(g), "context switched pm with no pm buffer!"); - vunmap(ctx_ptr); + gk20a_mem_end(g, mem); return -EFAULT; } @@ -1871,14 +1831,14 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, } else virt_addr = 0; - data = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); + data = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_pm_o()); data = data & ~ctxsw_prog_main_image_pm_mode_m(); data |= ch_ctx->pm_ctx.pm_mode; - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, data); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_ptr_o(), 0, virt_addr); + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data); + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_pm_ptr_o(), virt_addr); - vunmap(ctx_ptr); + gk20a_mem_end(g, mem); if (tegra_platform_is_linsim()) { u32 inst_base_ptr = @@ -1978,16 +1938,20 @@ static void gr_gk20a_init_ctxsw_ucode_segments( } static int gr_gk20a_copy_ctxsw_ucode_segments( - u8 *buf, + struct gk20a *g, + struct mem_desc *dst, struct gk20a_ctxsw_ucode_segments *segments, u32 *bootimage, u32 *code, u32 *data) { int i; - memcpy(buf + segments->boot.offset, bootimage, segments->boot.size); - memcpy(buf + segments->code.offset, code, segments->code.size); - memcpy(buf + segments->data.offset, data, segments->data.size); + gk20a_mem_wr_n(g, dst, segments->boot.offset, bootimage, + segments->boot.size); + gk20a_mem_wr_n(g, dst, segments->code.offset, code, + segments->code.size); + gk20a_mem_wr_n(g, dst, segments->data.offset, data, + segments->data.size); /* compute a "checksum" for the boot binary to detect its version */ segments->boot_signature = 0; @@ -2009,7 +1973,6 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) u32 *fecs_boot_image; u32 *gpccs_boot_image; struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; - u8 *buf; u32 ucode_size; int err = 0; @@ -2049,14 +2012,8 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) if (err) goto clean_up; - buf = (u8 *)ucode_info->surface_desc.cpu_va; - if (!buf) { - gk20a_err(d, "failed to map surface desc buffer"); - err = -ENOMEM; - goto clean_up; - } - - gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->fecs, + gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc, + &ucode_info->fecs, fecs_boot_image, g->gr.ctx_vars.ucode.fecs.inst.l, g->gr.ctx_vars.ucode.fecs.data.l); @@ -2064,7 +2021,8 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) release_firmware(fecs_fw); fecs_fw = NULL; - gr_gk20a_copy_ctxsw_ucode_segments(buf, &ucode_info->gpccs, + gr_gk20a_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc, + &ucode_info->gpccs, gpccs_boot_image, g->gr.ctx_vars.ucode.gpccs.inst.l, g->gr.ctx_vars.ucode.gpccs.data.l); @@ -4690,41 +4648,38 @@ out: static int gr_gk20a_init_access_map(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; - void *data; - int err = 0; + struct mem_desc *mem = &gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem; u32 w, nr_pages = DIV_ROUND_UP(gr->ctx_vars.priv_access_map_size, PAGE_SIZE); u32 *whitelist = NULL; int num_entries = 0; - data = vmap(gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.pages, - PAGE_ALIGN(gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size) >> - PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL)); - if (!data) { + if (gk20a_mem_begin(g, mem)) { gk20a_err(dev_from_gk20a(g), "failed to map priv access map memory"); - err = -ENOMEM; - goto clean_up; + return -ENOMEM; } - memset(data, 0x0, PAGE_SIZE * nr_pages); + gk20a_memset(g, mem, 0, 0, PAGE_SIZE * nr_pages); g->ops.gr.get_access_map(g, &whitelist, &num_entries); for (w = 0; w < num_entries; w++) { - u32 map_bit, map_byte, map_shift; + u32 map_bit, map_byte, map_shift, x; map_bit = whitelist[w] >> 2; map_byte = map_bit >> 3; map_shift = map_bit & 0x7; /* i.e. 0-7 */ gk20a_dbg_info("access map addr:0x%x byte:0x%x bit:%d", whitelist[w], map_byte, map_shift); - ((u8 *)data)[map_byte] |= 1 << map_shift; + x = gk20a_mem_rd32(g, mem, map_byte / sizeof(u32)); + x |= 1 << ( + (map_byte % sizeof(u32) * BITS_PER_BYTE) + + map_shift); + gk20a_mem_wr32(g, mem, map_byte / sizeof(u32), x); } -clean_up: - if (data) - vunmap(data); + gk20a_mem_end(g, mem); return 0; } @@ -6659,7 +6614,7 @@ static void gr_gk20a_init_sm_dsm_reg_info(void) static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u32 addr, u32 data, - u8 *context) + struct mem_desc *mem) { u32 num_gpc = g->gr.gpc_count; u32 num_tpc; @@ -6688,8 +6643,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, /* reset the patch count from previous runs,if ucode has already processed it */ - tmp = gk20a_mem_rd32(context + - ctxsw_prog_main_image_patch_count_o(), 0); + tmp = gk20a_mem_rd(g, mem, + ctxsw_prog_main_image_patch_count_o()); if (!tmp) ch_ctx->patch_ctx.data_count = 0; @@ -6700,15 +6655,15 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); - gk20a_mem_wr32(context + + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), - 0, ch_ctx->patch_ctx.data_count); - gk20a_mem_wr32(context + + ch_ctx->patch_ctx.data_count); + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_lo_o(), - 0, vaddr_lo); - gk20a_mem_wr32(context + + vaddr_lo); + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(), - 0, vaddr_hi); + vaddr_hi); /* we're not caching these on cpu side, but later watch for it */ @@ -6760,17 +6715,15 @@ static void gr_gk20a_access_smpc_reg(struct gk20a *g, u32 quad, u32 offset) #define ILLEGAL_ID (~0) -static inline bool check_main_image_header_magic(void *context) +static inline bool check_main_image_header_magic(u8 *context) { - u32 magic = gk20a_mem_rd32(context + - ctxsw_prog_main_image_magic_value_o(), 0); + u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o()); gk20a_dbg(gpu_dbg_gpu_dbg, "main image magic=0x%x", magic); return magic == ctxsw_prog_main_image_magic_value_v_value_v(); } -static inline bool check_local_header_magic(void *context) +static inline bool check_local_header_magic(u8 *context) { - u32 magic = gk20a_mem_rd32(context + - ctxsw_prog_local_magic_value_o(), 0); + u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o()); gk20a_dbg(gpu_dbg_gpu_dbg, "local magic=0x%x", magic); return magic == ctxsw_prog_local_magic_value_v_value_v(); @@ -6814,7 +6767,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, u32 num_gpcs, num_tpcs; u32 chk_addr; u32 ext_priv_offset, ext_priv_size; - void *context; + u8 *context; u32 offset_to_segment, offset_to_segment_end; u32 sm_dsm_perf_reg_id = ILLEGAL_ID; u32 sm_dsm_perf_ctrl_reg_id = ILLEGAL_ID; @@ -6856,14 +6809,14 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, /* note below is in words/num_registers */ marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2; - context = context_buffer; + context = (u8 *)context_buffer; /* sanity check main header */ if (!check_main_image_header_magic(context)) { gk20a_err(dev_from_gk20a(g), "Invalid main header: magic value"); return -EINVAL; } - num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); + num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o()); if (gpc_num >= num_gpcs) { gk20a_err(dev_from_gk20a(g), "GPC 0x%08x is greater than total count 0x%08x!\n", @@ -6871,7 +6824,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, return -EINVAL; } - data32 = gk20a_mem_rd32(context + ctxsw_prog_main_extended_buffer_ctl_o(), 0); + data32 = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o()); ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32); if (0 == ext_priv_size) { gk20a_dbg_info(" No extended memory in context buffer"); @@ -7149,7 +7102,7 @@ gr_gk20a_process_context_buffer_priv_segment(struct gk20a *g, } static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, - void *context, + u8 *context, u32 *num_ppcs, u32 *ppc_mask, u32 *reg_ppc_count) { @@ -7165,7 +7118,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, (num_pes_per_gpc > 1))) return -EINVAL; - data32 = gk20a_mem_rd32(context + ctxsw_prog_local_image_ppc_info_o(), 0); + data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o()); *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32); *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32); @@ -7177,7 +7130,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, /* * This function will return the 32 bit offset for a priv register if it is - * present in the context buffer. + * present in the context buffer. The context buffer is in CPU memory. */ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, u32 addr, @@ -7196,7 +7149,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, u32 offset; u32 sys_priv_offset, gpc_priv_offset; u32 ppc_mask, reg_list_ppc_count; - void *context; + u8 *context; u32 offset_to_segment; gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); @@ -7207,13 +7160,13 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, if (err) return err; - context = context_buffer; + context = (u8 *)context_buffer; if (!check_main_image_header_magic(context)) { gk20a_err(dev_from_gk20a(g), "Invalid main header: magic value"); return -EINVAL; } - num_gpcs = gk20a_mem_rd32(context + ctxsw_prog_main_image_num_gpcs_o(), 0); + num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o()); /* Parse the FECS local header. */ context += ctxsw_prog_ucode_header_size_in_bytes(); @@ -7222,7 +7175,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, "Invalid FECS local header: magic value\n"); return -EINVAL; } - data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); + data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o()); sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); /* If found in Ext buffer, ok. @@ -7268,7 +7221,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, return -EINVAL; } - data32 = gk20a_mem_rd32(context + ctxsw_prog_local_priv_register_ctl_o(), 0); + data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o()); gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); err = gr_gk20a_determine_ppc_configuration(g, context, @@ -7277,7 +7230,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, if (err) return err; - num_tpcs = gk20a_mem_rd32(context + ctxsw_prog_local_image_num_tpcs_o(), 0); + num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o()); if ((i == gpc_num) && ((tpc_num + 1) > num_tpcs)) { gk20a_err(dev_from_gk20a(g), @@ -7689,9 +7642,9 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, { struct gk20a *g = ch->g; struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; - void *ctx_ptr = NULL; - void *pm_ctx_ptr = NULL; - void *base_ptr = NULL; + bool gr_ctx_ready = false; + bool pm_ctx_ready = false; + struct mem_desc *current_mem = NULL; bool ch_is_curr_ctx, restart_gr_ctxsw = false; u32 i, j, offset, v; struct gr_gk20a *gr = &g->gr; @@ -7821,20 +7774,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), ctx_ops[i].quad); if (!err) { - if (!ctx_ptr) { + if (!gr_ctx_ready) { /* would have been a variant of * gr_gk20a_apply_instmem_overrides, * recoded in-place instead. */ - ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, - PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, - 0, pgprot_writecombine(PAGE_KERNEL)); - if (!ctx_ptr) { + if (gk20a_mem_begin(g, &ch_ctx->gr_ctx->mem)) { err = -ENOMEM; goto cleanup; } + gr_ctx_ready = true; } - base_ptr = ctx_ptr; + current_mem = &ch_ctx->gr_ctx->mem; } else { err = gr_gk20a_get_pm_ctx_buffer_offsets(g, ctx_ops[i].offset, @@ -7849,7 +7800,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, NVGPU_DBG_GPU_REG_OP_STATUS_INVALID_OFFSET; continue; } - if (!pm_ctx_ptr) { + if (!pm_ctx_ready) { /* Make sure ctx buffer was initialized */ if (!ch_ctx->pm_ctx.mem.pages) { gk20a_err(dev_from_gk20a(g), @@ -7857,15 +7808,13 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, err = -EINVAL; goto cleanup; } - pm_ctx_ptr = vmap(ch_ctx->pm_ctx.mem.pages, - PAGE_ALIGN(ch_ctx->pm_ctx.mem.size) >> PAGE_SHIFT, - 0, pgprot_writecombine(PAGE_KERNEL)); - if (!pm_ctx_ptr) { + if (gk20a_mem_begin(g, &ch_ctx->pm_ctx.mem)) { err = -ENOMEM; goto cleanup; } + pm_ctx_ready = true; } - base_ptr = pm_ctx_ptr; + current_mem = &ch_ctx->pm_ctx.mem; } /* if this is a quad access, setup for special access*/ @@ -7878,24 +7827,24 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, /* sanity check gr ctxt offsets, * don't write outside, worst case */ - if ((base_ptr == ctx_ptr) && + if ((current_mem == &ch_ctx->gr_ctx->mem) && (offsets[j] >= g->gr.ctx_vars.golden_image_size)) continue; if (pass == 0) { /* write pass */ - v = gk20a_mem_rd32(base_ptr + offsets[j], 0); + v = gk20a_mem_rd(g, current_mem, offsets[j]); v &= ~ctx_ops[i].and_n_mask_lo; v |= ctx_ops[i].value_lo; - gk20a_mem_wr32(base_ptr + offsets[j], 0, v); + gk20a_mem_wr(g, current_mem, offsets[j], v); gk20a_dbg(gpu_dbg_gpu_dbg, "context wr: offset=0x%x v=0x%x", offsets[j], v); if (ctx_ops[i].op == REGOP(WRITE_64)) { - v = gk20a_mem_rd32(base_ptr + offsets[j] + 4, 0); + v = gk20a_mem_rd(g, current_mem, offsets[j] + 4); v &= ~ctx_ops[i].and_n_mask_hi; v |= ctx_ops[i].value_hi; - gk20a_mem_wr32(base_ptr + offsets[j] + 4, 0, v); + gk20a_mem_wr(g, current_mem, offsets[j] + 4, v); gk20a_dbg(gpu_dbg_gpu_dbg, "context wr: offset=0x%x v=0x%x", @@ -7905,18 +7854,18 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, /* check to see if we need to add a special WAR for some of the SMPC perf regs */ gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], - v, base_ptr); + v, current_mem); } else { /* read pass */ ctx_ops[i].value_lo = - gk20a_mem_rd32(base_ptr + offsets[0], 0); + gk20a_mem_rd(g, current_mem, offsets[0]); gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", offsets[0], ctx_ops[i].value_lo); if (ctx_ops[i].op == REGOP(READ_64)) { ctx_ops[i].value_hi = - gk20a_mem_rd32(base_ptr + offsets[0] + 4, 0); + gk20a_mem_rd(g, current_mem, offsets[0] + 4); gk20a_dbg(gpu_dbg_gpu_dbg, "context rd: offset=0x%x v=0x%x", @@ -7943,12 +7892,10 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, if (ch_ctx->patch_ctx.mem.cpu_va) gr_gk20a_ctx_patch_write_end(g, ch_ctx); - - if (ctx_ptr) - vunmap(ctx_ptr); - - if (pm_ctx_ptr) - vunmap(pm_ctx_ptr); + if (gr_ctx_ready) + gk20a_mem_end(g, &ch_ctx->gr_ctx->mem); + if (pm_ctx_ready) + gk20a_mem_end(g, &ch_ctx->pm_ctx.mem); if (restart_gr_ctxsw) { int tmp_err = gr_gk20a_enable_ctxsw(g); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 6f6734b4d..13382416d 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -44,6 +44,112 @@ #include "kind_gk20a.h" #include "semaphore_gk20a.h" +int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) +{ + void *cpu_va; + + if (WARN_ON(mem->cpu_va)) { + gk20a_warn(dev_from_gk20a(g), "nested %s", __func__); + return -EBUSY; + } + + cpu_va = vmap(mem->pages, + PAGE_ALIGN(mem->size) >> PAGE_SHIFT, + 0, pgprot_writecombine(PAGE_KERNEL)); + + if (WARN_ON(!cpu_va)) + return -ENOMEM; + + mem->cpu_va = cpu_va; + return 0; +} + +void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) +{ + vunmap(mem->cpu_va); + mem->cpu_va = NULL; +} + +u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) +{ + u32 *ptr = mem->cpu_va; + u32 data; + + WARN_ON(!ptr); + data = ptr[w]; +#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM + gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); +#endif + return data; +} + +u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset) +{ + WARN_ON(offset & 3); + return gk20a_mem_rd32(g, mem, offset / sizeof(u32)); +} + +void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, + u32 offset, void *dest, u32 size) +{ + u32 i; + u32 *dest_u32 = dest; + + WARN_ON(offset & 3); + WARN_ON(size & 3); + offset /= sizeof(u32); + size /= sizeof(u32); + + for (i = 0; i < size; i++) + dest_u32[i] = gk20a_mem_rd32(g, mem, offset + i); +} + +void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) +{ + u32 *ptr = mem->cpu_va; + + WARN_ON(!ptr); +#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM + gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); +#endif + ptr[w] = data; +} + +void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data) +{ + WARN_ON(offset & 3); + gk20a_mem_wr32(g, mem, offset / sizeof(u32), data); +} + +void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, + void *src, u32 size) +{ + u32 i; + u32 *src_u32 = src; + + WARN_ON(offset & 3); + WARN_ON(size & 3); + offset /= sizeof(u32); + size /= sizeof(u32); + + for (i = 0; i < size; i++) + gk20a_mem_wr32(g, mem, offset + i, src_u32[i]); +} + +void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, + u32 value, u32 size) +{ + u32 i; + + WARN_ON(offset & 3); + WARN_ON(size & 3); + offset /= sizeof(u32); + size /= sizeof(u32); + + for (i = 0; i < size; i++) + gk20a_mem_wr32(g, mem, offset + i, value); +} + /* * GPU mapping life cycle * ====================== @@ -780,9 +886,14 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm, *pde_lo, *pde_hi); } -u32 *pde_from_index(struct vm_gk20a *vm, u32 i) +static u32 pde_from_index(u32 i) { - return (u32 *) (((u8 *)vm->pdb.mem.cpu_va) + i*gmmu_pde__size_v()); + return i * gmmu_pde__size_v() / sizeof(u32); +} + +static u32 pte_from_index(u32 i) +{ + return i * gmmu_pte__size_v() / sizeof(u32); } u32 pte_index_from_vaddr(struct vm_gk20a *vm, @@ -2323,7 +2434,7 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, u64 pte_addr_small = 0, pte_addr_big = 0; struct gk20a_mm_entry *entry = vm->pdb.entries + i; u32 pde_v[2] = {0, 0}; - u32 *pde; + u32 pde; gk20a_dbg_fn(""); @@ -2348,10 +2459,10 @@ static int update_gmmu_pde_locked(struct vm_gk20a *vm, (big_valid ? (gmmu_pde_vol_big_true_f()) : gmmu_pde_vol_big_false_f()); - pde = pde_from_index(vm, i); + pde = pde_from_index(i); - gk20a_mem_wr32(pde, 0, pde_v[0]); - gk20a_mem_wr32(pde, 1, pde_v[1]); + gk20a_mem_wr32(g, &vm->pdb.mem, pde + 0, pde_v[0]); + gk20a_mem_wr32(g, &vm->pdb.mem, pde + 1, pde_v[1]); gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); @@ -2432,8 +2543,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm, gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); } - gk20a_mem_wr32(pte->mem.cpu_va + i*8, 0, pte_w[0]); - gk20a_mem_wr32(pte->mem.cpu_va + i*8, 1, pte_w[1]); + gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 0, pte_w[0]); + gk20a_mem_wr32(g, &pte->mem, pte_from_index(i) + 1, pte_w[1]); if (*iova) { *iova += page_size; @@ -3489,19 +3600,19 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm) false, false, "cde"); } -void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) +void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr) { u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); u32 pdb_addr_hi = u64_hi32(pdb_addr); - gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), + gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(), (g->mm.vidmem_is_vidmem ? ram_in_page_dir_base_target_sys_mem_ncoh_f() : ram_in_page_dir_base_target_vid_mem_f()) | ram_in_page_dir_base_vol_true_f() | ram_in_page_dir_base_lo_f(pdb_addr_lo)); - gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), + gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(), ram_in_page_dir_base_hi_f(pdb_addr_hi)); } @@ -3510,23 +3621,22 @@ void gk20a_init_inst_block(struct mem_desc *inst_block, struct vm_gk20a *vm, { struct gk20a *g = gk20a_from_vm(vm); u64 pde_addr = g->ops.mm.get_iova_addr(g, vm->pdb.mem.sgt->sgl, 0); - void *inst_ptr = inst_block->cpu_va; gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p", - gk20a_mm_inst_block_addr(g, inst_block), inst_ptr); + gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va); gk20a_dbg_info("pde pa=0x%llx", (u64)pde_addr); - g->ops.mm.init_pdb(g, inst_ptr, pde_addr); + g->ops.mm.init_pdb(g, inst_block, pde_addr); - gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(), + gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_lo_w(), u64_lo32(vm->va_limit - 1) & ~0xfff); - gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), + gk20a_mem_wr32(g, inst_block, ram_in_adr_limit_hi_w(), ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit - 1))); if (big_page_size && g->ops.mm.set_big_page_size) - g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size); + g->ops.mm.set_big_page_size(g, inst_block, big_page_size); } int gk20a_mm_fb_flush(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 7fa0b7fbc..e9ac8f184 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -419,6 +419,34 @@ static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, return gmmu_page_size_small; } +/* + * Buffer accessors - wrap between begin() and end() if there is no permanent + * kernel mapping for this buffer. + */ + +int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem); +/* nop for null mem, like with free() or vunmap() */ +void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem); + +/* word-indexed offset */ +u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w); +/* byte offset (32b-aligned) */ +u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset); +/* memcpy to cpu, offset and size in bytes (32b-aligned) */ +void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, u32 offset, + void *dest, u32 size); + +/* word-indexed offset */ +void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data); +/* byte offset (32b-aligned) */ +void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data); +/* memcpy from cpu, offset and size in bytes (32b-aligned) */ +void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, + void *src, u32 size); +/* size and offset in bytes (32b-aligned), filled with u32s */ +void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, + u32 value, u32 size); + #if 0 /*related to addr bits above, concern below TBD on which is accurate */ #define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ bus_bar1_block_ptr_s()) @@ -673,7 +701,6 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm, u64 addr_lo, u64 addr_hi, u32 *pde_lo, u32 *pde_hi); int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm); -u32 *pde_from_index(struct vm_gk20a *vm, u32 i); u32 pte_index_from_vaddr(struct vm_gk20a *vm, u64 addr, enum gmmu_pgsz_gk20a pgsz_idx); void free_gmmu_pages(struct vm_gk20a *vm, @@ -685,7 +712,7 @@ struct gpu_ops; void gk20a_init_mm(struct gpu_ops *gops); const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, u32 big_page_size); -void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr); +void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, u64 pdb_addr); void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 56ad0c2a1..54b2eef4b 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -2421,11 +2421,10 @@ static int gk20a_init_pmu_reset_enable_hw(struct gk20a *g) static int gk20a_prepare_ucode(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; - int i, err = 0; + int err = 0; struct device *d = dev_from_gk20a(g); struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm = &mm->pmu.vm; - void *ucode_ptr; if (g->pmu_fw) { gk20a_init_pmu(pmu); @@ -2449,11 +2448,8 @@ static int gk20a_prepare_ucode(struct gk20a *g) if (err) goto err_release_fw; - ucode_ptr = pmu->ucode.cpu_va; - - for (i = 0; i < (pmu->desc->app_start_offset + - pmu->desc->app_size) >> 2; i++) - gk20a_mem_wr32(ucode_ptr, i, pmu->ucode_image[i]); + gk20a_mem_wr_n(g, &pmu->ucode, 0, pmu->ucode_image, + pmu->desc->app_start_offset + pmu->desc->app_size); gk20a_init_pmu(pmu); diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c index 0e6e715d2..3ac2cec88 100644 --- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c @@ -43,8 +43,8 @@ static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm, static void lsfm_free_ucode_img_res(struct flcn_ucode_img *p_img); static void lsfm_free_nonpmu_ucode_img_res(struct flcn_ucode_img *p_img); static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm); -static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, - void *nonwpr_addr); +static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, + struct mem_desc *nonwpr); static int acr_ucode_patch_sig(struct gk20a *g, unsigned int *p_img, unsigned int *p_prod_sig, @@ -355,7 +355,7 @@ int prepare_ucode_blob(struct gk20a *g) gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n", plsfm->managed_flcn_cnt, plsfm->wpr_size); - lsfm_init_wpr_contents(g, plsfm, g->acr.ucode_blob.cpu_va); + lsfm_init_wpr_contents(g, plsfm, &g->acr.ucode_blob); } else { gm20b_dbg_pmu("LSFM is managing no falcons.\n"); } @@ -613,120 +613,91 @@ static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g, } /* Initialize WPR contents */ -static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, - void *nonwpr_addr) +static void lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm, + struct mem_desc *ucode) { + struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list; + u32 i; - int status = 0; - union flcn_bl_generic_desc *nonwpr_bl_gen_desc; - if (nonwpr_addr == NULL) { - status = -ENOMEM; - } else { - struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list; - struct lsf_wpr_header *wpr_hdr; - struct lsf_lsb_header *lsb_hdr; - void *ucode_off; - u32 i; + /* The WPR array is at the base of the WPR */ + pnode = plsfm->ucode_img_list; + i = 0; - /* The WPR array is at the base of the WPR */ - wpr_hdr = (struct lsf_wpr_header *)nonwpr_addr; - pnode = plsfm->ucode_img_list; - i = 0; + /* + * Walk the managed falcons, flush WPR and LSB headers to FB. + * flush any bl args to the storage area relative to the + * ucode image (appended on the end as a DMEM area). + */ + while (pnode) { + /* Flush WPR header to memory*/ + gk20a_mem_wr_n(g, ucode, i * sizeof(pnode->wpr_header), + &pnode->wpr_header, sizeof(pnode->wpr_header)); - /* - * Walk the managed falcons, flush WPR and LSB headers to FB. - * flush any bl args to the storage area relative to the - * ucode image (appended on the end as a DMEM area). - */ - while (pnode) { - /* Flush WPR header to memory*/ - memcpy(&wpr_hdr[i], &pnode->wpr_header, - sizeof(struct lsf_wpr_header)); - gm20b_dbg_pmu("wpr header as in memory and pnode\n"); - gm20b_dbg_pmu("falconid :%d %d\n", - pnode->wpr_header.falcon_id, - wpr_hdr[i].falcon_id); - gm20b_dbg_pmu("lsb_offset :%x %x\n", - pnode->wpr_header.lsb_offset, - wpr_hdr[i].lsb_offset); - gm20b_dbg_pmu("bootstrap_owner :%d %d\n", - pnode->wpr_header.bootstrap_owner, - wpr_hdr[i].bootstrap_owner); - gm20b_dbg_pmu("lazy_bootstrap :%d %d\n", - pnode->wpr_header.lazy_bootstrap, - wpr_hdr[i].lazy_bootstrap); - gm20b_dbg_pmu("status :%d %d\n", - pnode->wpr_header.status, wpr_hdr[i].status); + gm20b_dbg_pmu("wpr header"); + gm20b_dbg_pmu("falconid :%d", + pnode->wpr_header.falcon_id); + gm20b_dbg_pmu("lsb_offset :%x", + pnode->wpr_header.lsb_offset); + gm20b_dbg_pmu("bootstrap_owner :%d", + pnode->wpr_header.bootstrap_owner); + gm20b_dbg_pmu("lazy_bootstrap :%d", + pnode->wpr_header.lazy_bootstrap); + gm20b_dbg_pmu("status :%d", + pnode->wpr_header.status); - /*Flush LSB header to memory*/ - lsb_hdr = (struct lsf_lsb_header *)((u8 *)nonwpr_addr + - pnode->wpr_header.lsb_offset); - memcpy(lsb_hdr, &pnode->lsb_header, - sizeof(struct lsf_lsb_header)); - gm20b_dbg_pmu("lsb header as in memory and pnode\n"); - gm20b_dbg_pmu("ucode_off :%x %x\n", - pnode->lsb_header.ucode_off, - lsb_hdr->ucode_off); - gm20b_dbg_pmu("ucode_size :%x %x\n", - pnode->lsb_header.ucode_size, - lsb_hdr->ucode_size); - gm20b_dbg_pmu("data_size :%x %x\n", - pnode->lsb_header.data_size, - lsb_hdr->data_size); - gm20b_dbg_pmu("bl_code_size :%x %x\n", - pnode->lsb_header.bl_code_size, - lsb_hdr->bl_code_size); - gm20b_dbg_pmu("bl_imem_off :%x %x\n", - pnode->lsb_header.bl_imem_off, - lsb_hdr->bl_imem_off); - gm20b_dbg_pmu("bl_data_off :%x %x\n", - pnode->lsb_header.bl_data_off, - lsb_hdr->bl_data_off); - gm20b_dbg_pmu("bl_data_size :%x %x\n", - pnode->lsb_header.bl_data_size, - lsb_hdr->bl_data_size); - gm20b_dbg_pmu("app_code_off :%x %x\n", - pnode->lsb_header.app_code_off, - lsb_hdr->app_code_off); - gm20b_dbg_pmu("app_code_size :%x %x\n", - pnode->lsb_header.app_code_size, - lsb_hdr->app_code_size); - gm20b_dbg_pmu("app_data_off :%x %x\n", - pnode->lsb_header.app_data_off, - lsb_hdr->app_data_off); - gm20b_dbg_pmu("app_data_size :%x %x\n", - pnode->lsb_header.app_data_size, - lsb_hdr->app_data_size); - gm20b_dbg_pmu("flags :%x %x\n", - pnode->lsb_header.flags, lsb_hdr->flags); + /*Flush LSB header to memory*/ + gk20a_mem_wr_n(g, ucode, pnode->wpr_header.lsb_offset, + &pnode->lsb_header, sizeof(pnode->lsb_header)); - /*If this falcon has a boot loader and related args, - * flush them.*/ - if (!pnode->ucode_img.header) { - nonwpr_bl_gen_desc = - (union flcn_bl_generic_desc *) - ((u8 *)nonwpr_addr + - pnode->lsb_header.bl_data_off); + gm20b_dbg_pmu("lsb header"); + gm20b_dbg_pmu("ucode_off :%x", + pnode->lsb_header.ucode_off); + gm20b_dbg_pmu("ucode_size :%x", + pnode->lsb_header.ucode_size); + gm20b_dbg_pmu("data_size :%x", + pnode->lsb_header.data_size); + gm20b_dbg_pmu("bl_code_size :%x", + pnode->lsb_header.bl_code_size); + gm20b_dbg_pmu("bl_imem_off :%x", + pnode->lsb_header.bl_imem_off); + gm20b_dbg_pmu("bl_data_off :%x", + pnode->lsb_header.bl_data_off); + gm20b_dbg_pmu("bl_data_size :%x", + pnode->lsb_header.bl_data_size); + gm20b_dbg_pmu("app_code_off :%x", + pnode->lsb_header.app_code_off); + gm20b_dbg_pmu("app_code_size :%x", + pnode->lsb_header.app_code_size); + gm20b_dbg_pmu("app_data_off :%x", + pnode->lsb_header.app_data_off); + gm20b_dbg_pmu("app_data_size :%x", + pnode->lsb_header.app_data_size); + gm20b_dbg_pmu("flags :%x", + pnode->lsb_header.flags); - /*Populate gen bl and flush to memory*/ - lsfm_fill_flcn_bl_gen_desc(g, pnode); - memcpy(nonwpr_bl_gen_desc, &pnode->bl_gen_desc, + /*If this falcon has a boot loader and related args, + * flush them.*/ + if (!pnode->ucode_img.header) { + /*Populate gen bl and flush to memory*/ + lsfm_fill_flcn_bl_gen_desc(g, pnode); + gk20a_mem_wr_n(g, ucode, + pnode->lsb_header.bl_data_off, + &pnode->bl_gen_desc, pnode->bl_gen_desc_size); - } - ucode_off = (void *)(pnode->lsb_header.ucode_off + - (u8 *)nonwpr_addr); - /*Copying of ucode*/ - memcpy(ucode_off, pnode->ucode_img.data, - pnode->ucode_img.data_size); - pnode = pnode->next; - i++; } - - /* Tag the terminator WPR header with an invalid falcon ID. */ - gk20a_mem_wr32(&wpr_hdr[plsfm->managed_flcn_cnt].falcon_id, - 0, LSF_FALCON_ID_INVALID); + /*Copying of ucode*/ + gk20a_mem_wr_n(g, ucode, pnode->lsb_header.ucode_off, + pnode->ucode_img.data, + pnode->ucode_img.data_size); + pnode = pnode->next; + i++; } - return status; + + /* Tag the terminator WPR header with an invalid falcon ID. */ + gk20a_mem_wr32(g, ucode, + plsfm->managed_flcn_cnt * sizeof(struct lsf_wpr_header) + + offsetof(struct lsf_wpr_header, falcon_id), + LSF_FALCON_ID_INVALID); } /*! @@ -1000,7 +971,7 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) { struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm = &mm->pmu.vm; - int i, err = 0; + int err = 0; u64 *acr_dmem; u32 img_size_in_bytes = 0; u32 status, size; @@ -1066,10 +1037,8 @@ int gm20b_bootstrap_hs_flcn(struct gk20a *g) ((struct flcn_acr_desc *)acr_dmem)->regions.no_regions = 2; ((struct flcn_acr_desc *)acr_dmem)->wpr_offset = 0; - for (i = 0; i < (img_size_in_bytes/4); i++) { - gk20a_mem_wr32(acr->acr_ucode.cpu_va, i, - acr_ucode_data_t210_load[i]); - } + gk20a_mem_wr_n(g, &acr->acr_ucode, 0, + acr_ucode_data_t210_load, img_size_in_bytes); /* * In order to execute this binary, we will be using * a bootloader which will load this image into PMU IMEM/DMEM. @@ -1323,7 +1292,7 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt) struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm = &mm->pmu.vm; struct device *d = dev_from_gk20a(g); - int i, err = 0; + int err = 0; u32 bl_sz; struct acr_gm20b *acr = &g->acr; const struct firmware *hsbl_fw = acr->hsbl_fw; @@ -1369,8 +1338,7 @@ int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt) goto err_free_ucode; } - for (i = 0; i < (bl_sz) >> 2; i++) - gk20a_mem_wr32(acr->hsbl_ucode.cpu_va, i, pmu_bl_gm10x[i]); + gk20a_mem_wr_n(g, &acr->hsbl_ucode, 0, pmu_bl_gm10x, bl_sz); gm20b_dbg_pmu("Copied bl ucode to bl_cpuva\n"); } /* diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index b9a1e685f..2197bae56 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -849,7 +849,7 @@ static int gr_gm20b_alloc_gr_ctx(struct gk20a *g, static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, - void *ctx_ptr) + struct mem_desc *mem) { struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; u32 cta_preempt_option = @@ -859,7 +859,8 @@ static void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) { gk20a_dbg_info("CTA: %x", cta_preempt_option); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_preemption_options_o(), 0, + gk20a_mem_wr(g, mem, + ctxsw_prog_main_image_preemption_options_o(), cta_preempt_option); } @@ -1005,7 +1006,7 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, bool enable) { struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; - void *ctx_ptr = NULL; + struct mem_desc *mem; u32 v; gk20a_dbg_fn(""); @@ -1013,18 +1014,17 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr) return -EINVAL; - ctx_ptr = vmap(ch_ctx->gr_ctx->mem.pages, - PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT, - 0, pgprot_writecombine(PAGE_KERNEL)); - if (!ctx_ptr) + mem = &ch_ctx->gr_ctx->mem; + + if (gk20a_mem_begin(c->g, mem)) return -ENOMEM; - v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0); + v = gk20a_mem_rd(c->g, mem, ctxsw_prog_main_image_pm_o()); v &= ~ctxsw_prog_main_image_pm_pc_sampling_m(); v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_pm_o(), 0, v); + gk20a_mem_wr(c->g, mem, ctxsw_prog_main_image_pm_o(), v); - vunmap(ctx_ptr); + gk20a_mem_end(c->g, mem); gk20a_dbg_fn("done"); @@ -1089,13 +1089,13 @@ static void gr_gm20b_init_cyclestats(struct gk20a *g) #endif } -static void gr_gm20b_enable_cde_in_fecs(void *ctx_ptr) +static void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct mem_desc *mem) { u32 cde_v; - cde_v = gk20a_mem_rd32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0); + cde_v = gk20a_mem_rd(g, mem, ctxsw_prog_main_image_ctl_o()); cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f(); - gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_ctl_o(), 0, cde_v); + gk20a_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v); } static void gr_gm20b_bpt_reg_info(struct gk20a *g, struct warpstate *w_state) diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index ac73b5c8f..726d73ed8 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c @@ -106,14 +106,14 @@ static void gm20b_mm_mmu_set_debug_mode(struct gk20a *g, bool enable) } static void gm20b_mm_set_big_page_size(struct gk20a *g, - void *inst_ptr, int size) + struct mem_desc *mem, int size) { u32 val; gk20a_dbg_fn(""); gk20a_dbg_info("big page size %d\n", size); - val = gk20a_mem_rd32(inst_ptr, ram_in_big_page_size_w()); + val = gk20a_mem_rd32(g, mem, ram_in_big_page_size_w()); val &= ~ram_in_big_page_size_m(); if (size == SZ_64K) @@ -121,7 +121,7 @@ static void gm20b_mm_set_big_page_size(struct gk20a *g, else val |= ram_in_big_page_size_128kb_f(); - gk20a_mem_wr32(inst_ptr, ram_in_big_page_size_w(), val); + gk20a_mem_wr32(g, mem, ram_in_big_page_size_w(), val); gk20a_dbg_fn("done"); } diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 66b5e4103..d1cba9792 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c @@ -285,8 +285,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) mutex_init(&f->free_chs_mutex); for (chid = 0; chid < f->num_channels; chid++) { - f->channel[chid].userd_cpu_va = - f->userd.cpu_va + chid * f->userd_entry_size; f->channel[chid].userd_iova = g->ops.mm.get_iova_addr(g, f->userd.sgt->sgl, 0) + chid * f->userd_entry_size;