diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 27d1f592f..f131803fa 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -150,6 +150,8 @@ nvgpu-y += \ common/nvlink/nvlink.o \ common/nvlink/nvlink_gv100.o \ common/nvlink/nvlink_tu104.o \ + hal/mm/cache/flush_gk20a.o \ + hal/mm/cache/flush_gv11b.o \ hal/mc/mc_gm20b.o \ hal/mc/mc_gp10b.o \ hal/mc/mc_gv11b.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index c368ca439..886222958 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -271,6 +271,8 @@ srcs += common/sim.c \ tu104/mm_tu104.c \ tu104/hal_tu104.c \ tu104/func_tu104.c \ + hal/mm/cache/flush_gk20a.c \ + hal/mm/cache/flush_gv11b.c \ hal/mc/mc_gm20b.c \ hal/mc/mc_gp10b.c \ hal/mc/mc_gv11b.c \ diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index b564c7e49..385f4824b 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -2594,7 +2594,7 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events) * Ensure that all pending writes are actually done before trying to * read semaphore values from DRAM. */ - g->ops.mm.fb_flush(g); + g->ops.mm.cache.fb_flush(g); for (chid = 0; chid < f->num_channels; chid++) { struct channel_gk20a *c = g->fifo.channel+chid; diff --git a/drivers/gpu/nvgpu/common/gr/ctx.c b/drivers/gpu/nvgpu/common/gr/ctx.c index d24599159..2ba41525d 100644 --- a/drivers/gpu/nvgpu/common/gr/ctx.c +++ b/drivers/gpu/nvgpu/common/gr/ctx.c @@ -689,7 +689,7 @@ u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) if (!gr_ctx->ctx_id_valid) { /* Channel gr_ctx buffer is gpu cacheable. Flush and invalidate before cpu update. */ - if (g->ops.mm.l2_flush(g, true) != 0) { + if (g->ops.mm.cache.l2_flush(g, true) != 0) { nvgpu_err(g, "l2_flush failed"); } @@ -707,7 +707,7 @@ int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) { int err; - err = g->ops.mm.l2_flush(g, true); + err = g->ops.mm.cache.l2_flush(g, true); if (err != 0) { nvgpu_err(g, "l2_flush failed"); return err; @@ -753,7 +753,7 @@ int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, /* Channel gr_ctx buffer is gpu cacheable. Flush and invalidate before cpu update. */ - err = g->ops.mm.l2_flush(g, true); + err = g->ops.mm.cache.l2_flush(g, true); if (err != 0) { nvgpu_err(g, "l2_flush failed"); return err; @@ -828,7 +828,7 @@ int nvgpu_gr_ctx_set_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, /* Channel gr_ctx buffer is gpu cacheable. Flush and invalidate before cpu update. */ - err = g->ops.mm.l2_flush(g, true); + err = g->ops.mm.cache.l2_flush(g, true); if (err != 0) { nvgpu_err(g, "l2_flush failed"); return err; diff --git a/drivers/gpu/nvgpu/common/gr/fecs_trace.c b/drivers/gpu/nvgpu/common/gr/fecs_trace.c index 51f1cdae7..9d2adc143 100644 --- a/drivers/gpu/nvgpu/common/gr/fecs_trace.c +++ b/drivers/gpu/nvgpu/common/gr/fecs_trace.c @@ -479,7 +479,7 @@ int nvgpu_gr_fecs_trace_poll(struct gk20a *g) read, g->ops.gr.fecs_trace.get_read_index(g), write, cnt); /* Ensure all FECS writes have made it to SYSMEM */ - g->ops.mm.fb_flush(g); + g->ops.mm.cache.fb_flush(g); while (read != write) { cnt = nvgpu_gr_fecs_trace_ring_read(g, read, &vm_update_mask); diff --git a/drivers/gpu/nvgpu/common/gr/global_ctx.c b/drivers/gpu/nvgpu/common/gr/global_ctx.c index 85b98aa33..2cffe2612 100644 --- a/drivers/gpu/nvgpu/common/gr/global_ctx.c +++ b/drivers/gpu/nvgpu/common/gr/global_ctx.c @@ -284,7 +284,7 @@ void nvgpu_gr_global_ctx_load_local_golden_image(struct gk20a *g, { /* Channel gr_ctx buffer is gpu cacheable. Flush and invalidate before cpu update. */ - if (g->ops.mm.l2_flush(g, true) != 0) { + if (g->ops.mm.cache.l2_flush(g, true) != 0) { nvgpu_err(g, "l2_flush failed"); } diff --git a/drivers/gpu/nvgpu/common/gr/subctx.c b/drivers/gpu/nvgpu/common/gr/subctx.c index 9a3b33104..d425dab50 100644 --- a/drivers/gpu/nvgpu/common/gr/subctx.c +++ b/drivers/gpu/nvgpu/common/gr/subctx.c @@ -88,7 +88,7 @@ void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g, struct nvgpu_mem *ctxheader = &subctx->ctx_header; int err = 0; - err = g->ops.mm.l2_flush(g, true); + err = g->ops.mm.cache.l2_flush(g, true); if (err != 0) { nvgpu_err(g, "l2_flush failed"); } @@ -154,4 +154,3 @@ struct nvgpu_mem *nvgpu_gr_subctx_get_ctx_header(struct gk20a *g, { return &subctx->ctx_header; } - diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c index 413a7d549..cef21117a 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c @@ -882,7 +882,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, } if (batch == NULL) { - if (gk20a_mm_l2_flush(g, true) != 0) { + if (g->ops.mm.cache.l2_flush(g, true) != 0) { nvgpu_err(g, "gk20a_mm_l2_flush[1] failed"); } err = g->ops.fb.tlb_invalidate(g, vm->pdb.mem); @@ -891,7 +891,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, } } else { if (!batch->gpu_l2_flushed) { - if (gk20a_mm_l2_flush(g, true) != 0) { + if (g->ops.mm.cache.l2_flush(g, true) != 0) { nvgpu_err(g, "gk20a_mm_l2_flush[2] failed"); } batch->gpu_l2_flushed = true; diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c index afdb2c8a3..47990a03f 100644 --- a/drivers/gpu/nvgpu/common/mm/mm.c +++ b/drivers/gpu/nvgpu/common/mm/mm.c @@ -124,8 +124,8 @@ int nvgpu_mm_suspend(struct gk20a *g) nvgpu_vidmem_thread_pause_sync(&g->mm); - g->ops.mm.cbc_clean(g); - err = g->ops.mm.l2_flush(g, false); + g->ops.mm.cache.cbc_clean(g); + err = g->ops.mm.cache.l2_flush(g, false); if (err != 0) { nvgpu_err(g, "l2_flush failed"); return err; diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index 4717b0e39..023ee9819 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -571,10 +571,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .gmmu_map = vgpu_locked_gmmu_map, .gmmu_unmap = vgpu_locked_gmmu_unmap, .vm_bind_channel = vgpu_vm_bind_channel, - .fb_flush = vgpu_mm_fb_flush, - .l2_invalidate = vgpu_mm_l2_invalidate, - .l2_flush = vgpu_mm_l2_flush, - .cbc_clean = NULL, .get_big_page_sizes = gm20b_mm_get_big_page_sizes, .get_default_big_page_size = gp10b_mm_get_default_big_page_size, .gpu_phys_addr = gm20b_gpu_phys_addr, @@ -590,6 +586,12 @@ static const struct gpu_ops vgpu_gp10b_ops = { .bar1_map_userd = vgpu_mm_bar1_map_userd, .vm_as_alloc_share = vgpu_vm_as_alloc_share, .vm_as_free_share = vgpu_vm_as_free_share, + .cache = { + .fb_flush = vgpu_mm_fb_flush, + .l2_invalidate = vgpu_mm_l2_invalidate, + .l2_flush = vgpu_mm_l2_flush, + .cbc_clean = NULL, + }, }, .pramin = { .data032_r = NULL, diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index 11769ee5f..022706e4b 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -663,10 +663,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .gmmu_map = vgpu_locked_gmmu_map, .gmmu_unmap = vgpu_locked_gmmu_unmap, .vm_bind_channel = vgpu_vm_bind_channel, - .fb_flush = vgpu_mm_fb_flush, - .l2_invalidate = vgpu_mm_l2_invalidate, - .l2_flush = vgpu_mm_l2_flush, - .cbc_clean = NULL, .get_big_page_sizes = gm20b_mm_get_big_page_sizes, .get_default_big_page_size = gp10b_mm_get_default_big_page_size, .gpu_phys_addr = gm20b_gpu_phys_addr, @@ -683,6 +679,12 @@ static const struct gpu_ops vgpu_gv11b_ops = { .bar1_map_userd = vgpu_mm_bar1_map_userd, .vm_as_alloc_share = vgpu_vm_as_alloc_share, .vm_as_free_share = vgpu_vm_as_free_share, + .cache = { + .fb_flush = vgpu_mm_fb_flush, + .l2_invalidate = vgpu_mm_l2_invalidate, + .l2_flush = vgpu_mm_l2_flush, + .cbc_clean = NULL, + }, }, .therm = { .init_therm_setup_hw = NULL, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 51e4f5bed..c90b6f9b4 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -2158,7 +2158,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, goto cleanup; } - err = g->ops.mm.l2_flush(g, true); + err = g->ops.mm.cache.l2_flush(g, true); if (err != 0) { nvgpu_err(g, "l2_flush failed"); goto cleanup; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index af6f36e18..f6bc19255 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -51,7 +51,6 @@ #include #include -#include /* * GPU mapping life cycle @@ -114,7 +113,8 @@ int gk20a_init_mm_setup_hw(struct gk20a *g) } } - if ((gk20a_mm_fb_flush(g) != 0) || (gk20a_mm_fb_flush(g) != 0)) { + if (g->ops.mm.cache.fb_flush(g) != 0 || + g->ops.mm.cache.fb_flush(g) != 0) { return -EBUSY; } @@ -406,234 +406,6 @@ int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block) return 0; } -int gk20a_mm_fb_flush(struct gk20a *g) -{ - struct mm_gk20a *mm = &g->mm; - u32 data; - int ret = 0; - struct nvgpu_timeout timeout; - u32 retries; - - nvgpu_log_fn(g, " "); - - gk20a_busy_noresume(g); - if (!g->power_on) { - gk20a_idle_nosuspend(g); - return 0; - } - - retries = 100; - - if (g->ops.mm.get_flush_retries != NULL) { - retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_FB); - } - - nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER); - - nvgpu_mutex_acquire(&mm->l2_op_lock); - - /* Make sure all previous writes are committed to the L2. There's no - guarantee that writes are to DRAM. This will be a sysmembar internal - to the L2. */ - - trace_gk20a_mm_fb_flush(g->name); - - gk20a_writel(g, flush_fb_flush_r(), - flush_fb_flush_pending_busy_f()); - - do { - data = gk20a_readl(g, flush_fb_flush_r()); - - if (flush_fb_flush_outstanding_v(data) == - flush_fb_flush_outstanding_true_v() || - flush_fb_flush_pending_v(data) == - flush_fb_flush_pending_busy_v()) { - nvgpu_log_info(g, "fb_flush 0x%x", data); - nvgpu_udelay(5); - } else { - break; - } - } while (nvgpu_timeout_expired(&timeout) == 0); - - if (nvgpu_timeout_peek_expired(&timeout) != 0) { - if (g->ops.fb.dump_vpr_info != NULL) { - g->ops.fb.dump_vpr_info(g); - } - if (g->ops.fb.dump_wpr_info != NULL) { - g->ops.fb.dump_wpr_info(g); - } - ret = -EBUSY; - } - - trace_gk20a_mm_fb_flush_done(g->name); - - nvgpu_mutex_release(&mm->l2_op_lock); - - gk20a_idle_nosuspend(g); - - return ret; -} - -static void gk20a_mm_l2_invalidate_locked(struct gk20a *g) -{ - u32 data; - struct nvgpu_timeout timeout; - u32 retries = 200; - - trace_gk20a_mm_l2_invalidate(g->name); - - if (g->ops.mm.get_flush_retries != NULL) { - retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_INV); - } - - nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER); - - /* Invalidate any clean lines from the L2 so subsequent reads go to - DRAM. Dirty lines are not affected by this operation. */ - gk20a_writel(g, flush_l2_system_invalidate_r(), - flush_l2_system_invalidate_pending_busy_f()); - - do { - data = gk20a_readl(g, flush_l2_system_invalidate_r()); - - if (flush_l2_system_invalidate_outstanding_v(data) == - flush_l2_system_invalidate_outstanding_true_v() || - flush_l2_system_invalidate_pending_v(data) == - flush_l2_system_invalidate_pending_busy_v()) { - nvgpu_log_info(g, "l2_system_invalidate 0x%x", - data); - nvgpu_udelay(5); - } else { - break; - } - } while (nvgpu_timeout_expired(&timeout) == 0); - - if (nvgpu_timeout_peek_expired(&timeout) != 0) { - nvgpu_warn(g, "l2_system_invalidate too many retries"); - } - - trace_gk20a_mm_l2_invalidate_done(g->name); -} - -void gk20a_mm_l2_invalidate(struct gk20a *g) -{ - struct mm_gk20a *mm = &g->mm; - gk20a_busy_noresume(g); - if (g->power_on) { - nvgpu_mutex_acquire(&mm->l2_op_lock); - gk20a_mm_l2_invalidate_locked(g); - nvgpu_mutex_release(&mm->l2_op_lock); - } - gk20a_idle_nosuspend(g); -} - -int gk20a_mm_l2_flush(struct gk20a *g, bool invalidate) -{ - struct mm_gk20a *mm = &g->mm; - u32 data; - struct nvgpu_timeout timeout; - u32 retries = 2000; - int err = -ETIMEDOUT; - - nvgpu_log_fn(g, " "); - - gk20a_busy_noresume(g); - if (!g->power_on) { - goto hw_was_off; - } - - if (g->ops.mm.get_flush_retries != NULL) { - retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_FLUSH); - } - - nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER); - - nvgpu_mutex_acquire(&mm->l2_op_lock); - - trace_gk20a_mm_l2_flush(g->name); - - /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2 - as clean, so subsequent reads might hit in the L2. */ - gk20a_writel(g, flush_l2_flush_dirty_r(), - flush_l2_flush_dirty_pending_busy_f()); - - do { - data = gk20a_readl(g, flush_l2_flush_dirty_r()); - - if (flush_l2_flush_dirty_outstanding_v(data) == - flush_l2_flush_dirty_outstanding_true_v() || - flush_l2_flush_dirty_pending_v(data) == - flush_l2_flush_dirty_pending_busy_v()) { - nvgpu_log_info(g, "l2_flush_dirty 0x%x", data); - nvgpu_udelay(5); - } else { - err = 0; - break; - } - } while (nvgpu_timeout_expired_msg(&timeout, - "l2_flush_dirty too many retries") == 0); - - trace_gk20a_mm_l2_flush_done(g->name); - - if (invalidate) { - gk20a_mm_l2_invalidate_locked(g); - } - - nvgpu_mutex_release(&mm->l2_op_lock); - -hw_was_off: - gk20a_idle_nosuspend(g); - - return err; -} - -void gk20a_mm_cbc_clean(struct gk20a *g) -{ - struct mm_gk20a *mm = &g->mm; - u32 data; - struct nvgpu_timeout timeout; - u32 retries = 200; - - nvgpu_log_fn(g, " "); - - gk20a_busy_noresume(g); - if (!g->power_on) { - goto hw_was_off; - } - - if (g->ops.mm.get_flush_retries != NULL) { - retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_CBC_CLEAN); - } - - nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER); - - nvgpu_mutex_acquire(&mm->l2_op_lock); - - /* Flush all dirty lines from the CBC to L2 */ - gk20a_writel(g, flush_l2_clean_comptags_r(), - flush_l2_clean_comptags_pending_busy_f()); - - do { - data = gk20a_readl(g, flush_l2_clean_comptags_r()); - - if (flush_l2_clean_comptags_outstanding_v(data) == - flush_l2_clean_comptags_outstanding_true_v() || - flush_l2_clean_comptags_pending_v(data) == - flush_l2_clean_comptags_pending_busy_v()) { - nvgpu_log_info(g, "l2_clean_comptags 0x%x", data); - nvgpu_udelay(5); - } else { - break; - } - } while (nvgpu_timeout_expired_msg(&timeout, - "l2_clean_comptags too many retries") == 0); - - nvgpu_mutex_release(&mm->l2_op_lock); - -hw_was_off: - gk20a_idle_nosuspend(g); -} - u32 gk20a_mm_get_iommu_bit(struct gk20a *g) { return 34; @@ -656,4 +428,3 @@ u64 gk20a_mm_bar1_map_userd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset) gk20a_mem_flag_none, false, mem->aperture); } - diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index b7749a897..0426bd912 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -72,11 +72,6 @@ gk20a_buffer_state_from_list(struct nvgpu_list_node *node) struct gk20a; struct channel_gk20a; -int gk20a_mm_fb_flush(struct gk20a *g); -int gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); -void gk20a_mm_cbc_clean(struct gk20a *g); -void gk20a_mm_l2_invalidate(struct gk20a *g); - #define dev_from_vm(vm) dev_from_gk20a(vm->mm->g) void gk20a_mm_ltc_isr(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index be9a39270..40c4e0193 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -46,6 +46,7 @@ #include #include +#include "hal/mm/cache/flush_gk20a.h" #include "hal/mc/mc_gm20b.h" #include "hal/bus/bus_gm20b.h" #include "hal/bus/bus_gk20a.h" @@ -806,10 +807,6 @@ static const struct gpu_ops gm20b_ops = { .gmmu_map = gk20a_locked_gmmu_map, .gmmu_unmap = gk20a_locked_gmmu_unmap, .vm_bind_channel = gk20a_vm_bind_channel, - .fb_flush = gk20a_mm_fb_flush, - .l2_invalidate = gk20a_mm_l2_invalidate, - .l2_flush = gk20a_mm_l2_flush, - .cbc_clean = gk20a_mm_cbc_clean, .get_big_page_sizes = gm20b_mm_get_big_page_sizes, .get_default_big_page_size = gm20b_mm_get_default_big_page_size, .gpu_phys_addr = gm20b_gpu_phys_addr, @@ -822,6 +819,12 @@ static const struct gpu_ops gm20b_ops = { .get_kind_invalid = gm20b_get_kind_invalid, .get_kind_pitch = gm20b_get_kind_pitch, .bar1_map_userd = gk20a_mm_bar1_map_userd, + .cache = { + .fb_flush = gk20a_mm_fb_flush, + .l2_invalidate = gk20a_mm_l2_invalidate, + .l2_flush = gk20a_mm_l2_flush, + .cbc_clean = gk20a_mm_cbc_clean, + }, }, .therm = { .init_therm_setup_hw = gm20b_init_therm_setup_hw, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index a5000410a..857bb8cfe 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -47,6 +47,7 @@ #include #include +#include "hal/mm/cache/flush_gk20a.h" #include "hal/mc/mc_gm20b.h" #include "hal/mc/mc_gp10b.h" #include "hal/bus/bus_gk20a.h" @@ -906,10 +907,6 @@ static const struct gpu_ops gp10b_ops = { .gmmu_map = gk20a_locked_gmmu_map, .gmmu_unmap = gk20a_locked_gmmu_unmap, .vm_bind_channel = gk20a_vm_bind_channel, - .fb_flush = gk20a_mm_fb_flush, - .l2_invalidate = gk20a_mm_l2_invalidate, - .l2_flush = gk20a_mm_l2_flush, - .cbc_clean = gk20a_mm_cbc_clean, .get_big_page_sizes = gm20b_mm_get_big_page_sizes, .get_default_big_page_size = gp10b_mm_get_default_big_page_size, .gpu_phys_addr = gm20b_gpu_phys_addr, @@ -924,6 +921,12 @@ static const struct gpu_ops gp10b_ops = { .get_kind_invalid = gm20b_get_kind_invalid, .get_kind_pitch = gm20b_get_kind_pitch, .bar1_map_userd = gk20a_mm_bar1_map_userd, + .cache = { + .fb_flush = gk20a_mm_fb_flush, + .l2_invalidate = gk20a_mm_l2_invalidate, + .l2_flush = gk20a_mm_l2_flush, + .cbc_clean = gk20a_mm_cbc_clean, + }, }, .pramin = { .data032_r = pram_data032_r, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index fce669fe9..a041aa45d 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -22,6 +22,8 @@ * DEALINGS IN THE SOFTWARE. */ +#include "hal/mm/cache/flush_gk20a.h" +#include "hal/mm/cache/flush_gv11b.h" #include "hal/mc/mc_gm20b.h" #include "hal/mc/mc_gp10b.h" #include "hal/mc/mc_gv11b.h" @@ -1088,10 +1090,6 @@ static const struct gpu_ops gv100_ops = { .gmmu_map = gk20a_locked_gmmu_map, .gmmu_unmap = gk20a_locked_gmmu_unmap, .vm_bind_channel = gk20a_vm_bind_channel, - .fb_flush = gk20a_mm_fb_flush, - .l2_invalidate = gk20a_mm_l2_invalidate, - .l2_flush = gv11b_mm_l2_flush, - .cbc_clean = gk20a_mm_cbc_clean, .get_big_page_sizes = gm20b_mm_get_big_page_sizes, .get_default_big_page_size = gp10b_mm_get_default_big_page_size, .gpu_phys_addr = gv11b_gpu_phys_addr, @@ -1108,6 +1106,12 @@ static const struct gpu_ops gv100_ops = { .mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw, .get_flush_retries = gv100_mm_get_flush_retries, .bar1_map_userd = NULL, + .cache = { + .fb_flush = gk20a_mm_fb_flush, + .l2_invalidate = gk20a_mm_l2_invalidate, + .l2_flush = gv11b_mm_l2_flush, + .cbc_clean = gk20a_mm_cbc_clean, + }, }, .pramin = { .data032_r = pram_data032_r, diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index f740101a7..c2c76753c 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -30,6 +30,8 @@ #include #include +#include "hal/mm/cache/flush_gk20a.h" +#include "hal/mm/cache/flush_gv11b.h" #include "hal/mc/mc_gm20b.h" #include "hal/mc/mc_gp10b.h" #include "hal/mc/mc_gv11b.h" @@ -1064,10 +1066,6 @@ static const struct gpu_ops gv11b_ops = { .gmmu_map = gk20a_locked_gmmu_map, .gmmu_unmap = gk20a_locked_gmmu_unmap, .vm_bind_channel = gk20a_vm_bind_channel, - .fb_flush = gk20a_mm_fb_flush, - .l2_invalidate = gk20a_mm_l2_invalidate, - .l2_flush = gv11b_mm_l2_flush, - .cbc_clean = gk20a_mm_cbc_clean, .get_big_page_sizes = gm20b_mm_get_big_page_sizes, .get_default_big_page_size = gp10b_mm_get_default_big_page_size, .gpu_phys_addr = gv11b_gpu_phys_addr, @@ -1084,6 +1082,12 @@ static const struct gpu_ops gv11b_ops = { .fault_info_mem_destroy = gv11b_mm_fault_info_mem_destroy, .mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw, .bar1_map_userd = NULL, + .cache = { + .fb_flush = gk20a_mm_fb_flush, + .l2_invalidate = gk20a_mm_l2_invalidate, + .l2_flush = gv11b_mm_l2_flush, + .cbc_clean = gk20a_mm_cbc_clean, + }, }, .therm = { .init_therm_setup_hw = gv11b_init_therm_setup_hw, diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c index 0932a6d3a..a0895e446 100644 --- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c @@ -200,40 +200,6 @@ int gv11b_init_mm_setup_hw(struct gk20a *g) return err; } -int gv11b_mm_l2_flush(struct gk20a *g, bool invalidate) -{ - int err = 0; - - nvgpu_log(g, gpu_dbg_fn, "gv11b_mm_l2_flush"); - - err = g->ops.mm.fb_flush(g); - if (err != 0) { - nvgpu_err(g, "mm.fb_flush()[1] failed err=%d", err); - return err; - } - err = gk20a_mm_l2_flush(g, invalidate); - if (err != 0) { - nvgpu_err(g, "gk20a_mm_l2_flush failed"); - return err; - } - if (g->ops.bus.bar1_bind != NULL) { - err = g->ops.fb.tlb_invalidate(g, - g->mm.bar1.vm->pdb.mem); - if (err != 0) { - nvgpu_err(g, "fb.tlb_invalidate() failed err=%d", err); - return err; - } - } else { - err = g->ops.mm.fb_flush(g); - if (err != 0) { - nvgpu_err(g, "mm.fb_flush()[2] failed err=%d", err); - return err; - } - } - - return err; -} - /* * On Volta the GPU determines whether to do L3 allocation for a mapping by * checking bit 36 of the phsyical address. So if a mapping should allocte lines diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.h b/drivers/gpu/nvgpu/gv11b/mm_gv11b.h index bca67f083..d9d1fe0c0 100644 --- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.h @@ -32,7 +32,6 @@ bool gv11b_mm_is_bar1_supported(struct gk20a *g); void gv11b_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, u32 big_page_size); int gv11b_init_mm_setup_hw(struct gk20a *g); -int gv11b_mm_l2_flush(struct gk20a *g, bool invalidate); u64 gv11b_gpu_phys_addr(struct gk20a *g, struct nvgpu_gmmu_attrs *attrs, u64 phys); void gv11b_mm_fault_info_mem_destroy(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gp10b.c b/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gp10b.c index 58575848e..0d62f99de 100644 --- a/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gp10b.c +++ b/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gp10b.c @@ -61,7 +61,7 @@ void gp10b_ltc_intr_handle_lts_interrupts(struct gk20a *g, u32 ltc, u32 slice) nvgpu_writel_check(g, ltc_ltc0_lts0_dstg_ecc_report_r() + offset, ecc_stats_reg_val); - if (g->ops.mm.l2_flush(g, true) != 0) { + if (g->ops.mm.cache.l2_flush(g, true) != 0) { nvgpu_err(g, "l2_flush failed"); } } diff --git a/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.c b/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.c new file mode 100644 index 000000000..a51fd6dd5 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include +#include +#include +#include + +#include + +#include "flush_gk20a.h" + +int gk20a_mm_fb_flush(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + u32 data; + int ret = 0; + struct nvgpu_timeout timeout; + u32 retries; + + nvgpu_log_fn(g, " "); + + gk20a_busy_noresume(g); + if (!g->power_on) { + gk20a_idle_nosuspend(g); + return 0; + } + + retries = 100; + + if (g->ops.mm.get_flush_retries != NULL) { + retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_FB); + } + + nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER); + + nvgpu_mutex_acquire(&mm->l2_op_lock); + + /* Make sure all previous writes are committed to the L2. There's no + guarantee that writes are to DRAM. This will be a sysmembar internal + to the L2. */ + + trace_gk20a_mm_fb_flush(g->name); + + nvgpu_writel(g, flush_fb_flush_r(), + flush_fb_flush_pending_busy_f()); + + do { + data = nvgpu_readl(g, flush_fb_flush_r()); + + if (flush_fb_flush_outstanding_v(data) == + flush_fb_flush_outstanding_true_v() || + flush_fb_flush_pending_v(data) == + flush_fb_flush_pending_busy_v()) { + nvgpu_log_info(g, "fb_flush 0x%x", data); + nvgpu_udelay(5); + } else { + break; + } + } while (nvgpu_timeout_expired(&timeout) == 0); + + if (nvgpu_timeout_peek_expired(&timeout) != 0) { + if (g->ops.fb.dump_vpr_info != NULL) { + g->ops.fb.dump_vpr_info(g); + } + if (g->ops.fb.dump_wpr_info != NULL) { + g->ops.fb.dump_wpr_info(g); + } + ret = -EBUSY; + } + + trace_gk20a_mm_fb_flush_done(g->name); + + nvgpu_mutex_release(&mm->l2_op_lock); + + gk20a_idle_nosuspend(g); + + return ret; +} + +static void gk20a_mm_l2_invalidate_locked(struct gk20a *g) +{ + u32 data; + struct nvgpu_timeout timeout; + u32 retries = 200; + + trace_gk20a_mm_l2_invalidate(g->name); + + if (g->ops.mm.get_flush_retries != NULL) { + retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_INV); + } + + nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER); + + /* Invalidate any clean lines from the L2 so subsequent reads go to + DRAM. Dirty lines are not affected by this operation. */ + nvgpu_writel(g, flush_l2_system_invalidate_r(), + flush_l2_system_invalidate_pending_busy_f()); + + do { + data = nvgpu_readl(g, flush_l2_system_invalidate_r()); + + if (flush_l2_system_invalidate_outstanding_v(data) == + flush_l2_system_invalidate_outstanding_true_v() || + flush_l2_system_invalidate_pending_v(data) == + flush_l2_system_invalidate_pending_busy_v()) { + nvgpu_log_info(g, "l2_system_invalidate 0x%x", + data); + nvgpu_udelay(5); + } else { + break; + } + } while (nvgpu_timeout_expired(&timeout) == 0); + + if (nvgpu_timeout_peek_expired(&timeout) != 0) { + nvgpu_warn(g, "l2_system_invalidate too many retries"); + } + + trace_gk20a_mm_l2_invalidate_done(g->name); +} + +void gk20a_mm_l2_invalidate(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + gk20a_busy_noresume(g); + if (g->power_on) { + nvgpu_mutex_acquire(&mm->l2_op_lock); + gk20a_mm_l2_invalidate_locked(g); + nvgpu_mutex_release(&mm->l2_op_lock); + } + gk20a_idle_nosuspend(g); +} + +int gk20a_mm_l2_flush(struct gk20a *g, bool invalidate) +{ + struct mm_gk20a *mm = &g->mm; + u32 data; + struct nvgpu_timeout timeout; + u32 retries = 2000; + int err = -ETIMEDOUT; + + nvgpu_log_fn(g, " "); + + gk20a_busy_noresume(g); + if (!g->power_on) { + goto hw_was_off; + } + + if (g->ops.mm.get_flush_retries != NULL) { + retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_FLUSH); + } + + nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER); + + nvgpu_mutex_acquire(&mm->l2_op_lock); + + trace_gk20a_mm_l2_flush(g->name); + + /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2 + as clean, so subsequent reads might hit in the L2. */ + nvgpu_writel(g, flush_l2_flush_dirty_r(), + flush_l2_flush_dirty_pending_busy_f()); + + do { + data = nvgpu_readl(g, flush_l2_flush_dirty_r()); + + if (flush_l2_flush_dirty_outstanding_v(data) == + flush_l2_flush_dirty_outstanding_true_v() || + flush_l2_flush_dirty_pending_v(data) == + flush_l2_flush_dirty_pending_busy_v()) { + nvgpu_log_info(g, "l2_flush_dirty 0x%x", data); + nvgpu_udelay(5); + } else { + err = 0; + break; + } + } while (nvgpu_timeout_expired_msg(&timeout, + "l2_flush_dirty too many retries") == 0); + + trace_gk20a_mm_l2_flush_done(g->name); + + if (invalidate) { + gk20a_mm_l2_invalidate_locked(g); + } + + nvgpu_mutex_release(&mm->l2_op_lock); + +hw_was_off: + gk20a_idle_nosuspend(g); + + return err; +} + +void gk20a_mm_cbc_clean(struct gk20a *g) +{ + struct mm_gk20a *mm = &g->mm; + u32 data; + struct nvgpu_timeout timeout; + u32 retries = 200; + + nvgpu_log_fn(g, " "); + + gk20a_busy_noresume(g); + if (!g->power_on) { + goto hw_was_off; + } + + if (g->ops.mm.get_flush_retries != NULL) { + retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_CBC_CLEAN); + } + + nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER); + + nvgpu_mutex_acquire(&mm->l2_op_lock); + + /* Flush all dirty lines from the CBC to L2 */ + nvgpu_writel(g, flush_l2_clean_comptags_r(), + flush_l2_clean_comptags_pending_busy_f()); + + do { + data = nvgpu_readl(g, flush_l2_clean_comptags_r()); + + if (flush_l2_clean_comptags_outstanding_v(data) == + flush_l2_clean_comptags_outstanding_true_v() || + flush_l2_clean_comptags_pending_v(data) == + flush_l2_clean_comptags_pending_busy_v()) { + nvgpu_log_info(g, "l2_clean_comptags 0x%x", data); + nvgpu_udelay(5); + } else { + break; + } + } while (nvgpu_timeout_expired_msg(&timeout, + "l2_clean_comptags too many retries") == 0); + + nvgpu_mutex_release(&mm->l2_op_lock); + +hw_was_off: + gk20a_idle_nosuspend(g); +} diff --git a/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.h b/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.h new file mode 100644 index 000000000..f157e7898 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef HAL_MM_FLUSH_FLUSH_GK20A_H +#define HAL_MM_FLUSH_FLUSH_GK20A_H + +#include + +struct gk20a; + +int gk20a_mm_fb_flush(struct gk20a *g); +int gk20a_mm_l2_flush(struct gk20a *g, bool invalidate); +void gk20a_mm_cbc_clean(struct gk20a *g); +void gk20a_mm_l2_invalidate(struct gk20a *g); + +#endif diff --git a/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.c b/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.c new file mode 100644 index 000000000..0ee040b8a --- /dev/null +++ b/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include + +#include "flush_gk20a.h" +#include "flush_gv11b.h" + +int gv11b_mm_l2_flush(struct gk20a *g, bool invalidate) +{ + int err = 0; + + nvgpu_log(g, gpu_dbg_fn, "gv11b_mm_l2_flush"); + + err = g->ops.mm.cache.fb_flush(g); + if (err != 0) { + nvgpu_err(g, "mm.cache.fb_flush()[1] failed err=%d", err); + return err; + } + err = gk20a_mm_l2_flush(g, invalidate); + if (err != 0) { + nvgpu_err(g, "gk20a_mm_l2_flush failed"); + return err; + } + if (g->ops.bus.bar1_bind != NULL) { + err = g->ops.fb.tlb_invalidate(g, g->mm.bar1.vm->pdb.mem); + if (err != 0) { + nvgpu_err(g, "fb.tlb_invalidate() failed err=%d", err); + return err; + } + } else { + err = g->ops.mm.cache.fb_flush(g); + if (err != 0) { + nvgpu_err(g, "mm.cache.fb_flush()[2] failed err=%d", + err); + return err; + } + } + + return err; +} diff --git a/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.h b/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.h new file mode 100644 index 000000000..c0ff42532 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef HAL_MM_FLUSH_FLUSH_GV11B_H +#define HAL_MM_FLUSH_FLUSH_GV11B_H + +#include + +struct gk20a; + +int gv11b_mm_l2_flush(struct gk20a *g, bool invalidate); + +#endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 86ea6bc22..d5cd9562a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -1363,10 +1363,6 @@ struct gpu_ops { struct vm_gk20a_mapping_batch *batch); int (*vm_bind_channel)(struct vm_gk20a *vm, struct channel_gk20a *ch); - int (*fb_flush)(struct gk20a *g); - void (*l2_invalidate)(struct gk20a *g); - int (*l2_flush)(struct gk20a *g, bool invalidate); - void (*cbc_clean)(struct gk20a *g); u32 (*get_big_page_sizes)(void); u32 (*get_default_big_page_size)(void); u32 (*get_iommu_bit)(struct gk20a *g); @@ -1391,6 +1387,12 @@ struct gpu_ops { u64 (*bar1_map_userd)(struct gk20a *g, struct nvgpu_mem *mem, u32 offset); int (*vm_as_alloc_share)(struct gk20a *g, struct vm_gk20a *vm); void (*vm_as_free_share)(struct vm_gk20a *vm); + struct { + int (*fb_flush)(struct gk20a *g); + void (*l2_invalidate)(struct gk20a *g); + int (*l2_flush)(struct gk20a *g, bool invalidate); + void (*cbc_clean)(struct gk20a *g); + } cache; } mm; /* * This function is called to allocate secure memory (memory diff --git a/drivers/gpu/nvgpu/libnvgpu-drv.export b/drivers/gpu/nvgpu/libnvgpu-drv.export index 6433009f2..b06247574 100644 --- a/drivers/gpu/nvgpu/libnvgpu-drv.export +++ b/drivers/gpu/nvgpu/libnvgpu-drv.export @@ -31,6 +31,7 @@ gk20a_runlist_get_tsg_entry gk20a_locked_gmmu_map gk20a_locked_gmmu_unmap gk20a_ramin_alloc_size +gk20a_mm_fb_flush gm20b_fb_tlb_invalidate gm20b_fuse_status_opt_gpc gm20b_ramin_set_big_page_size diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 470e6d4e3..a733ab2e5 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -609,7 +609,8 @@ static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g, return -EINVAL; if (args->l2_flush) { - err = g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false); + err = g->ops.mm.cache.l2_flush(g, args->l2_invalidate ? + true : false); if (err != 0) { nvgpu_err(g, "l2_flush failed"); return err; @@ -617,7 +618,7 @@ static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g, } if (args->fb_flush) { - g->ops.mm.fb_flush(g); + g->ops.mm.cache.fb_flush(g); } return err; diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 576bef22a..348a5ed96 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -22,6 +22,8 @@ * DEALINGS IN THE SOFTWARE. */ +#include "hal/mm/cache/flush_gk20a.h" +#include "hal/mm/cache/flush_gv11b.h" #include "hal/mc/mc_gm20b.h" #include "hal/mc/mc_gp10b.h" #include "hal/mc/mc_gv11b.h" @@ -1126,10 +1128,6 @@ static const struct gpu_ops tu104_ops = { .gmmu_map = gk20a_locked_gmmu_map, .gmmu_unmap = gk20a_locked_gmmu_unmap, .vm_bind_channel = gk20a_vm_bind_channel, - .fb_flush = gk20a_mm_fb_flush, - .l2_invalidate = gk20a_mm_l2_invalidate, - .l2_flush = gv11b_mm_l2_flush, - .cbc_clean = gk20a_mm_cbc_clean, .get_big_page_sizes = gm20b_mm_get_big_page_sizes, .get_default_big_page_size = gp10b_mm_get_default_big_page_size, .gpu_phys_addr = gv11b_gpu_phys_addr, @@ -1146,6 +1144,12 @@ static const struct gpu_ops tu104_ops = { .mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw, .get_flush_retries = tu104_mm_get_flush_retries, .bar1_map_userd = NULL, + .cache = { + .fb_flush = gk20a_mm_fb_flush, + .l2_invalidate = gk20a_mm_l2_invalidate, + .l2_flush = gv11b_mm_l2_flush, + .cbc_clean = gk20a_mm_cbc_clean, + }, }, .pramin = { .data032_r = pram_data032_r, diff --git a/userspace/units/mm/gmmu/page_table/page_table.c b/userspace/units/mm/gmmu/page_table/page_table.c index 1e9800bae..bcdc1e264 100644 --- a/userspace/units/mm/gmmu/page_table/page_table.c +++ b/userspace/units/mm/gmmu/page_table/page_table.c @@ -41,6 +41,8 @@ #include #include +#include +#include #include #include #include @@ -303,6 +305,8 @@ static int init_mm(struct unit_module *m, struct gk20a *g) g->ops.mm.gmmu_unmap = gk20a_locked_gmmu_unmap; g->ops.mm.gpu_phys_addr = gv11b_gpu_phys_addr; g->ops.mm.is_bar1_supported = gv11b_mm_is_bar1_supported; + g->ops.mm.cache.l2_flush = gv11b_mm_l2_flush; + g->ops.mm.cache.fb_flush = gk20a_mm_fb_flush; g->ops.fb.compression_page_size = gp10b_fb_compression_page_size; g->ops.fb.tlb_invalidate = gm20b_fb_tlb_invalidate; g->ops.ramin.init_pdb = gp10b_ramin_init_pdb; diff --git a/userspace/units/mm/page_table_faults/page_table_faults.c b/userspace/units/mm/page_table_faults/page_table_faults.c index b86d60983..7df3c80a3 100644 --- a/userspace/units/mm/page_table_faults/page_table_faults.c +++ b/userspace/units/mm/page_table_faults/page_table_faults.c @@ -43,12 +43,14 @@ #include "nvgpu/hw/gv11b/hw_gmmu_gv11b.h" #include "nvgpu/hw/gv11b/hw_fb_gv11b.h" +#include "hal/mm/cache/flush_gk20a.h" +#include "hal/mm/cache/flush_gv11b.h" #include "hal/mc/mc_gv11b.h" #include "hal/fb/fb_gp10b.h" #include "hal/fb/fb_gm20b.h" #include "hal/fb/fb_gv11b.h" -#include "hal/fifo/ramin_gk20a.h" #include "hal/fb/intr/fb_intr_gv11b.h" +#include "hal/fifo/ramin_gk20a.h" #include "hal/fifo/ramin_gm20b.h" #include "hal/fifo/ramin_gp10b.h" @@ -133,7 +135,8 @@ static int init_mm(struct unit_module *m, struct gk20a *g) g->ops.mm.fault_info_mem_destroy = gv11b_mm_fault_info_mem_destroy; g->ops.mm.mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw; g->ops.mm.init_mm_setup_hw = gv11b_init_mm_setup_hw; - g->ops.mm.l2_flush = gv11b_mm_l2_flush; + g->ops.mm.cache.l2_flush = gv11b_mm_l2_flush; + g->ops.mm.cache.fb_flush = gk20a_mm_fb_flush; g->ops.fb.init_hw = gv11b_fb_init_hw; g->ops.fb.intr.enable = gv11b_fb_intr_enable; g->ops.fb.fault_buf_configure_hw = gv11b_fb_fault_buf_configure_hw; diff --git a/userspace/units/mm/vm/vm.c b/userspace/units/mm/vm/vm.c index 271d9214c..615928bb6 100644 --- a/userspace/units/mm/vm/vm.c +++ b/userspace/units/mm/vm/vm.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include #include #include @@ -137,6 +139,8 @@ static int init_test_env(struct unit_module *m, struct gk20a *g) g->ops.mm.gmmu_map = gk20a_locked_gmmu_map; g->ops.mm.gmmu_unmap = gk20a_locked_gmmu_unmap; g->ops.mm.gpu_phys_addr = gv11b_gpu_phys_addr; + g->ops.mm.cache.l2_flush = gv11b_mm_l2_flush; + g->ops.mm.cache.fb_flush = gk20a_mm_fb_flush; return UNIT_SUCCESS; }