diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c index 4359f2827..1a780212b 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c @@ -3,7 +3,7 @@ * * GK20A Graphics * - * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -19,6 +19,7 @@ */ #include +#include #include "hw_ltc_gk20a.h" #include "hw_proj_gk20a.h" @@ -107,15 +108,15 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, int err = 0; struct gr_gk20a *gr = &g->gr; u32 fbp, slice, ctrl1, val, hw_op = 0; - unsigned long end_jiffies = jiffies + - msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); - u32 delay = GR_IDLE_CHECK_DEFAULT; + u32 retry = 200; u32 slices_per_fbp = ltc_ltcs_ltss_cbc_param_slices_per_fbp_v( gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); gk20a_dbg_fn(""); + trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max); + if (gr->compbit_store.size == 0) return 0; @@ -141,25 +142,23 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, for (fbp = 0; fbp < gr->num_fbps; fbp++) { for (slice = 0; slice < slices_per_fbp; slice++) { - delay = GR_IDLE_CHECK_DEFAULT; ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + fbp * proj_ltc_stride_v() + slice * proj_lts_stride_v(); + retry = 200; do { val = gk20a_readl(g, ctrl1); if (!(val & hw_op)) break; + retry--; + udelay(5); - usleep_range(delay, delay * 2); - delay = min_t(u32, delay << 1, - GR_IDLE_CHECK_MAX); - - } while (time_before(jiffies, end_jiffies) || + } while (retry >= 0 || !tegra_platform_is_silicon()); - if (!time_before(jiffies, end_jiffies)) { + if (retry < 0 && tegra_platform_is_silicon()) { gk20a_err(dev_from_gk20a(g), "comp tag clear timeout\n"); err = -EBUSY; @@ -168,6 +167,7 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, } } out: + trace_gk20a_ltc_cbc_ctrl_done(g->dev->name); mutex_unlock(&g->mm.l2_op_lock); return 0; } @@ -200,6 +200,8 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) gk20a_dbg_fn(""); + trace_gk20a_mm_g_elpg_flush_locked(g->dev->name); + /* Make sure all previous writes are committed to the L2. There's no guarantee that writes are to DRAM. This will be a sysmembar internal to the L2. */ @@ -212,7 +214,7 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) ltc_ltc0_ltss_g_elpg_flush_pending_v()) { gk20a_dbg_info("g_elpg_flush 0x%x", data); retry--; - usleep_range(20, 40); + udelay(5); } else break; } while (retry >= 0 || !tegra_platform_is_silicon()); @@ -221,6 +223,8 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g) gk20a_warn(dev_from_gk20a(g), "g_elpg_flush too many retries"); + trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name); + } static int gk20a_determine_L2_size_bytes(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 0a0efe414..522cd1dcf 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c @@ -1,7 +1,7 @@ /* * GM20B L2 * - * Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2015 NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -15,6 +15,7 @@ #include #include +#include #include "hw_mc_gm20b.h" #include "hw_ltc_gm20b.h" @@ -26,6 +27,7 @@ #include "gk20a/gk20a.h" #include "gk20a/gk20a_allocator.h" + static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) { /* max memory size (MB) to cover */ @@ -107,14 +109,14 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, int err = 0; struct gr_gk20a *gr = &g->gr; u32 ltc, slice, ctrl1, val, hw_op = 0; - unsigned long end_jiffies = jiffies + - msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); - u32 delay = GR_IDLE_CHECK_DEFAULT; + s32 retry = 200; u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); gk20a_dbg_fn(""); + trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max); + if (gr->compbit_store.size == 0) return 0; @@ -139,25 +141,22 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, for (ltc = 0; ltc < g->ltc_count; ltc++) { for (slice = 0; slice < slices_per_ltc; slice++) { - delay = GR_IDLE_CHECK_DEFAULT; - ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + ltc * proj_ltc_stride_v() + slice * proj_lts_stride_v(); + retry = 200; do { val = gk20a_readl(g, ctrl1); if (!(val & hw_op)) break; + retry--; + udelay(5); - usleep_range(delay, delay * 2); - delay = min_t(u32, delay << 1, - GR_IDLE_CHECK_MAX); - - } while (time_before(jiffies, end_jiffies) | + } while (retry >= 0 || !tegra_platform_is_silicon()); - if (!time_before(jiffies, end_jiffies)) { + if (retry < 0 && tegra_platform_is_silicon()) { gk20a_err(dev_from_gk20a(g), "comp tag clear timeout\n"); err = -EBUSY; @@ -166,6 +165,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, } } out: + trace_gk20a_ltc_cbc_ctrl_done(g->dev->name); mutex_unlock(&g->mm.l2_op_lock); return 0; } @@ -232,6 +232,8 @@ void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g) gk20a_dbg_fn(""); + trace_gk20a_mm_g_elpg_flush_locked(g->dev->name); + for (i = 0; i < g->ltc_count; i++) done[i] = 0; @@ -255,14 +257,16 @@ void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g) if (num_done < g->ltc_count) { retry--; - usleep_range(20, 40); + udelay(5); } else break; } while (retry >= 0 || !tegra_platform_is_silicon()); - if (retry < 0) + if (retry < 0 && tegra_platform_is_silicon()) gk20a_warn(dev_from_gk20a(g), "g_elpg_flush too many retries"); + + trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name); } u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h index 096b05591..ad738f437 100644 --- a/include/trace/events/gk20a.h +++ b/include/trace/events/gk20a.h @@ -130,6 +130,16 @@ DEFINE_EVENT(gk20a, gr_gk20a_handle_sw_method, TP_ARGS(name) ); +DEFINE_EVENT(gk20a, gk20a_mm_g_elpg_flush_locked, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(gk20a, gk20a_mm_g_elpg_flush_locked_done, + TP_PROTO(const char *name), + TP_ARGS(name) +); + TRACE_EVENT(gk20a_channel_update, TP_PROTO(const void *channel), TP_ARGS(channel), @@ -368,6 +378,43 @@ TRACE_EVENT(gk20a_mmu_fault, __entry->engine, __entry->client, __entry->fault_type) ); +TRACE_EVENT(gk20a_ltc_cbc_ctrl_start, + TP_PROTO(const char *name, u32 cbc_ctrl, u32 min_value, + u32 max_value), + TP_ARGS(name, cbc_ctrl, min_value, max_value), + + TP_STRUCT__entry( + __field(const char *, name) + __field(u32, cbc_ctrl) + __field(u32, min_value) + __field(u32, max_value) + ), + + TP_fast_assign( + __entry->name = name; + __entry->cbc_ctrl = cbc_ctrl; + __entry->min_value = min_value; + __entry->max_value = max_value; + ), + + TP_printk("name=%s, cbc_ctrl=%d, min_value=%u, max_value=%u", + __entry->name, __entry->cbc_ctrl, __entry->min_value, + __entry->max_value) +); + +TRACE_EVENT(gk20a_ltc_cbc_ctrl_done, + TP_PROTO(const char *name), + TP_ARGS(name), + TP_STRUCT__entry( + __field(const char *, name) + ), + TP_fast_assign( + __entry->name = name; + ), + TP_printk("name=%s ", __entry->name) + +); + DECLARE_EVENT_CLASS(gk20a_cde, TP_PROTO(const void *ctx), TP_ARGS(ctx),