diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 9e19fa533..75775d57d 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "gk20a.h" #include "kind_gk20a.h" @@ -4998,6 +4999,8 @@ static int gr_gk20a_handle_sw_method(struct gk20a *g, u32 addr, { gk20a_dbg_fn(""); + trace_gr_gk20a_handle_sw_method(g->dev->name); + if (class_num == KEPLER_COMPUTE_A) { switch (offset << 2) { case NVA0C0_SET_SHADER_EXCEPTIONS: diff --git a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c index 9f9e756bc..86fea3a16 100644 --- a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c @@ -14,6 +14,7 @@ */ #include +#include #include "gk20a.h" #include "mc_gk20a.h" @@ -23,6 +24,8 @@ irqreturn_t mc_gk20a_isr_stall(struct gk20a *g) { u32 mc_intr_0; + trace_mc_gk20a_intr_stall(g->dev->name); + if (!g->power_on) return IRQ_NONE; @@ -37,6 +40,8 @@ irqreturn_t mc_gk20a_isr_stall(struct gk20a *g) /* flush previous write */ gk20a_readl(g, mc_intr_en_0_r()); + trace_mc_gk20a_intr_stall_done(g->dev->name); + return IRQ_WAKE_THREAD; } @@ -67,6 +72,8 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g) gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); + trace_mc_gk20a_intr_thread_stall(g->dev->name); + mc_intr_0 = gk20a_readl(g, mc_intr_0_r()); gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0); @@ -92,6 +99,8 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g) /* flush previous write */ gk20a_readl(g, mc_intr_en_0_r()); + trace_mc_gk20a_intr_thread_stall_done(g->dev->name); + return IRQ_HANDLED; } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 2874567c9..798b64050 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "gk20a.h" #include "mm_gk20a.h" @@ -2816,6 +2817,9 @@ int gk20a_mm_fb_flush(struct gk20a *g) /* Make sure all previous writes are committed to the L2. There's no guarantee that writes are to DRAM. This will be a sysmembar internal to the L2. */ + + trace_gk20a_mm_fb_flush(g->dev->name); + gk20a_writel(g, flush_fb_flush_r(), flush_fb_flush_pending_busy_f()); @@ -2828,7 +2832,7 @@ int gk20a_mm_fb_flush(struct gk20a *g) flush_fb_flush_pending_busy_v()) { gk20a_dbg_info("fb_flush 0x%x", data); retry--; - usleep_range(20, 40); + udelay(5); } else break; } while (retry >= 0 || !tegra_platform_is_silicon()); @@ -2839,6 +2843,8 @@ int gk20a_mm_fb_flush(struct gk20a *g) ret = -EBUSY; } + trace_gk20a_mm_fb_flush_done(g->dev->name); + mutex_unlock(&mm->l2_op_lock); return ret; @@ -2849,6 +2855,8 @@ static void gk20a_mm_l2_invalidate_locked(struct gk20a *g) u32 data; s32 retry = 200; + trace_gk20a_mm_l2_invalidate(g->dev->name); + /* Invalidate any clean lines from the L2 so subsequent reads go to DRAM. Dirty lines are not affected by this operation. */ gk20a_writel(g, flush_l2_system_invalidate_r(), @@ -2864,7 +2872,7 @@ static void gk20a_mm_l2_invalidate_locked(struct gk20a *g) gk20a_dbg_info("l2_system_invalidate 0x%x", data); retry--; - usleep_range(20, 40); + udelay(5); } else break; } while (retry >= 0 || !tegra_platform_is_silicon()); @@ -2872,6 +2880,8 @@ static void gk20a_mm_l2_invalidate_locked(struct gk20a *g) if (retry < 0) gk20a_warn(dev_from_gk20a(g), "l2_system_invalidate too many retries"); + + trace_gk20a_mm_l2_invalidate_done(g->dev->name); } void gk20a_mm_l2_invalidate(struct gk20a *g) @@ -2900,6 +2910,8 @@ void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate) mutex_lock(&mm->l2_op_lock); + trace_gk20a_mm_l2_flush(g->dev->name); + /* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2 as clean, so subsequent reads might hit in the L2. */ gk20a_writel(g, flush_l2_flush_dirty_r(), @@ -2914,7 +2926,7 @@ void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate) flush_l2_flush_dirty_pending_busy_v()) { gk20a_dbg_info("l2_flush_dirty 0x%x", data); retry--; - usleep_range(20, 40); + udelay(5); } else break; } while (retry >= 0 || !tegra_platform_is_silicon()); @@ -2923,6 +2935,8 @@ void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate) gk20a_warn(dev_from_gk20a(g), "l2_flush_dirty too many retries"); + trace_gk20a_mm_l2_flush_done(g->dev->name); + if (invalidate) gk20a_mm_l2_invalidate_locked(g); @@ -2964,7 +2978,7 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl) >> 12); u32 data; - s32 retry = 200; + s32 retry = 2000; static DEFINE_MUTEX(tlb_lock); gk20a_dbg_fn(""); @@ -2986,11 +3000,14 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) } mutex_lock(&tlb_lock); + + trace_gk20a_mm_tlb_invalidate(g->dev->name); + do { data = gk20a_readl(g, fb_mmu_ctrl_r()); if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0) break; - usleep_range(20, 40); + udelay(2); retry--; } while (retry >= 0 || !tegra_platform_is_silicon()); @@ -3014,13 +3031,15 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) fb_mmu_ctrl_pri_fifo_empty_false_f()) break; retry--; - usleep_range(20, 40); + udelay(2); } while (retry >= 0 || !tegra_platform_is_silicon()); if (retry < 0) gk20a_warn(dev_from_gk20a(g), "mmu invalidate too many retries"); + trace_gk20a_mm_tlb_invalidate_done(g->dev->name); + out: mutex_unlock(&tlb_lock); vm->tlb_dirty = false; diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h index 32e3100a5..096b05591 100644 --- a/include/trace/events/gk20a.h +++ b/include/trace/events/gk20a.h @@ -1,7 +1,7 @@ /* * gk20a event logging to ftrace. * - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -65,6 +65,71 @@ DEFINE_EVENT(gk20a, gk20a_gpfifo_submit_wait_for_space_done, TP_ARGS(name) ); +DEFINE_EVENT(gk20a, gk20a_mm_l2_invalidate, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(gk20a, gk20a_mm_l2_invalidate_done, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(gk20a, gk20a_mm_l2_flush, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(gk20a, gk20a_mm_l2_flush_done, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(gk20a, gk20a_mm_tlb_invalidate, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(gk20a, gk20a_mm_tlb_invalidate_done, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(gk20a, gk20a_mm_fb_flush, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(gk20a, gk20a_mm_fb_flush_done, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(gk20a, mc_gk20a_intr_thread_stall, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(gk20a, mc_gk20a_intr_thread_stall_done, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(gk20a, mc_gk20a_intr_stall, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(gk20a, mc_gk20a_intr_stall_done, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(gk20a, gr_gk20a_handle_sw_method, + TP_PROTO(const char *name), + TP_ARGS(name) +); + TRACE_EVENT(gk20a_channel_update, TP_PROTO(const void *channel), TP_ARGS(channel),