mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
gpu: nvgpu: Use busy looping for flush operations
Use busy looping for l2 tag flush and elpg flush operations. This is making total flash time more accurate and reduced overall time compared with usleep. Also added trace points to measure performance for these operations. Also corrected timeout error check for non-silicon platforms. Bug 200081799 Change-Id: I63410bb7528db9258501633996fbdee5fdec1c74 Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/710472 (cherry picked from commit 18684cf9d5d6870a1a1fd5711c4fc2d733caad20) Reviewed-on: http://git-master/r/710986 GVS: Gerrit_Virtual_Submit Reviewed-by: Yu-Huan Hsu <yhsu@nvidia.com>
This commit is contained in:
committed by
Dan Willemsen
parent
5f6cc1289e
commit
ced17a2d31
@@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* GK20A Graphics
|
* GK20A Graphics
|
||||||
*
|
*
|
||||||
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
* under the terms and conditions of the GNU General Public License,
|
* under the terms and conditions of the GNU General Public License,
|
||||||
@@ -19,6 +19,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
|
#include <trace/events/gk20a.h>
|
||||||
|
|
||||||
#include "hw_ltc_gk20a.h"
|
#include "hw_ltc_gk20a.h"
|
||||||
#include "hw_proj_gk20a.h"
|
#include "hw_proj_gk20a.h"
|
||||||
@@ -107,15 +108,15 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
|
|||||||
int err = 0;
|
int err = 0;
|
||||||
struct gr_gk20a *gr = &g->gr;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
u32 fbp, slice, ctrl1, val, hw_op = 0;
|
u32 fbp, slice, ctrl1, val, hw_op = 0;
|
||||||
unsigned long end_jiffies = jiffies +
|
u32 retry = 200;
|
||||||
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
|
|
||||||
u32 delay = GR_IDLE_CHECK_DEFAULT;
|
|
||||||
u32 slices_per_fbp =
|
u32 slices_per_fbp =
|
||||||
ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(
|
ltc_ltcs_ltss_cbc_param_slices_per_fbp_v(
|
||||||
gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
|
gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
|
||||||
|
|
||||||
gk20a_dbg_fn("");
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max);
|
||||||
|
|
||||||
if (gr->compbit_store.size == 0)
|
if (gr->compbit_store.size == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@@ -141,25 +142,23 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
|
|||||||
for (fbp = 0; fbp < gr->num_fbps; fbp++) {
|
for (fbp = 0; fbp < gr->num_fbps; fbp++) {
|
||||||
for (slice = 0; slice < slices_per_fbp; slice++) {
|
for (slice = 0; slice < slices_per_fbp; slice++) {
|
||||||
|
|
||||||
delay = GR_IDLE_CHECK_DEFAULT;
|
|
||||||
|
|
||||||
ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
|
ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
|
||||||
fbp * proj_ltc_stride_v() +
|
fbp * proj_ltc_stride_v() +
|
||||||
slice * proj_lts_stride_v();
|
slice * proj_lts_stride_v();
|
||||||
|
|
||||||
|
retry = 200;
|
||||||
do {
|
do {
|
||||||
val = gk20a_readl(g, ctrl1);
|
val = gk20a_readl(g, ctrl1);
|
||||||
if (!(val & hw_op))
|
if (!(val & hw_op))
|
||||||
break;
|
break;
|
||||||
|
retry--;
|
||||||
|
udelay(5);
|
||||||
|
|
||||||
usleep_range(delay, delay * 2);
|
} while (retry >= 0 ||
|
||||||
delay = min_t(u32, delay << 1,
|
|
||||||
GR_IDLE_CHECK_MAX);
|
|
||||||
|
|
||||||
} while (time_before(jiffies, end_jiffies) ||
|
|
||||||
!tegra_platform_is_silicon());
|
!tegra_platform_is_silicon());
|
||||||
|
|
||||||
if (!time_before(jiffies, end_jiffies)) {
|
if (retry < 0 && tegra_platform_is_silicon()) {
|
||||||
gk20a_err(dev_from_gk20a(g),
|
gk20a_err(dev_from_gk20a(g),
|
||||||
"comp tag clear timeout\n");
|
"comp tag clear timeout\n");
|
||||||
err = -EBUSY;
|
err = -EBUSY;
|
||||||
@@ -168,6 +167,7 @@ static int gk20a_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
|
trace_gk20a_ltc_cbc_ctrl_done(g->dev->name);
|
||||||
mutex_unlock(&g->mm.l2_op_lock);
|
mutex_unlock(&g->mm.l2_op_lock);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -200,6 +200,8 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
|
|||||||
|
|
||||||
gk20a_dbg_fn("");
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
trace_gk20a_mm_g_elpg_flush_locked(g->dev->name);
|
||||||
|
|
||||||
/* Make sure all previous writes are committed to the L2. There's no
|
/* Make sure all previous writes are committed to the L2. There's no
|
||||||
guarantee that writes are to DRAM. This will be a sysmembar internal
|
guarantee that writes are to DRAM. This will be a sysmembar internal
|
||||||
to the L2. */
|
to the L2. */
|
||||||
@@ -212,7 +214,7 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
|
|||||||
ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
|
ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
|
||||||
gk20a_dbg_info("g_elpg_flush 0x%x", data);
|
gk20a_dbg_info("g_elpg_flush 0x%x", data);
|
||||||
retry--;
|
retry--;
|
||||||
usleep_range(20, 40);
|
udelay(5);
|
||||||
} else
|
} else
|
||||||
break;
|
break;
|
||||||
} while (retry >= 0 || !tegra_platform_is_silicon());
|
} while (retry >= 0 || !tegra_platform_is_silicon());
|
||||||
@@ -221,6 +223,8 @@ static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
|
|||||||
gk20a_warn(dev_from_gk20a(g),
|
gk20a_warn(dev_from_gk20a(g),
|
||||||
"g_elpg_flush too many retries");
|
"g_elpg_flush too many retries");
|
||||||
|
|
||||||
|
trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gk20a_determine_L2_size_bytes(struct gk20a *g)
|
static int gk20a_determine_L2_size_bytes(struct gk20a *g)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* GM20B L2
|
* GM20B L2
|
||||||
*
|
*
|
||||||
* Copyright (c) 2014 NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2014-2015 NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
* under the terms and conditions of the GNU General Public License,
|
* under the terms and conditions of the GNU General Public License,
|
||||||
@@ -15,6 +15,7 @@
|
|||||||
|
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <linux/jiffies.h>
|
#include <linux/jiffies.h>
|
||||||
|
#include <trace/events/gk20a.h>
|
||||||
|
|
||||||
#include "hw_mc_gm20b.h"
|
#include "hw_mc_gm20b.h"
|
||||||
#include "hw_ltc_gm20b.h"
|
#include "hw_ltc_gm20b.h"
|
||||||
@@ -26,6 +27,7 @@
|
|||||||
#include "gk20a/gk20a.h"
|
#include "gk20a/gk20a.h"
|
||||||
#include "gk20a/gk20a_allocator.h"
|
#include "gk20a/gk20a_allocator.h"
|
||||||
|
|
||||||
|
|
||||||
static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
|
static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
|
||||||
{
|
{
|
||||||
/* max memory size (MB) to cover */
|
/* max memory size (MB) to cover */
|
||||||
@@ -107,14 +109,14 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
|
|||||||
int err = 0;
|
int err = 0;
|
||||||
struct gr_gk20a *gr = &g->gr;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
u32 ltc, slice, ctrl1, val, hw_op = 0;
|
u32 ltc, slice, ctrl1, val, hw_op = 0;
|
||||||
unsigned long end_jiffies = jiffies +
|
s32 retry = 200;
|
||||||
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
|
|
||||||
u32 delay = GR_IDLE_CHECK_DEFAULT;
|
|
||||||
u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
|
u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(
|
||||||
gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
|
gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
|
||||||
|
|
||||||
gk20a_dbg_fn("");
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
trace_gk20a_ltc_cbc_ctrl_start(g->dev->name, op, min, max);
|
||||||
|
|
||||||
if (gr->compbit_store.size == 0)
|
if (gr->compbit_store.size == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@@ -139,25 +141,22 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
|
|||||||
for (ltc = 0; ltc < g->ltc_count; ltc++) {
|
for (ltc = 0; ltc < g->ltc_count; ltc++) {
|
||||||
for (slice = 0; slice < slices_per_ltc; slice++) {
|
for (slice = 0; slice < slices_per_ltc; slice++) {
|
||||||
|
|
||||||
delay = GR_IDLE_CHECK_DEFAULT;
|
|
||||||
|
|
||||||
ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
|
ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
|
||||||
ltc * proj_ltc_stride_v() +
|
ltc * proj_ltc_stride_v() +
|
||||||
slice * proj_lts_stride_v();
|
slice * proj_lts_stride_v();
|
||||||
|
|
||||||
|
retry = 200;
|
||||||
do {
|
do {
|
||||||
val = gk20a_readl(g, ctrl1);
|
val = gk20a_readl(g, ctrl1);
|
||||||
if (!(val & hw_op))
|
if (!(val & hw_op))
|
||||||
break;
|
break;
|
||||||
|
retry--;
|
||||||
|
udelay(5);
|
||||||
|
|
||||||
usleep_range(delay, delay * 2);
|
} while (retry >= 0 ||
|
||||||
delay = min_t(u32, delay << 1,
|
|
||||||
GR_IDLE_CHECK_MAX);
|
|
||||||
|
|
||||||
} while (time_before(jiffies, end_jiffies) |
|
|
||||||
!tegra_platform_is_silicon());
|
!tegra_platform_is_silicon());
|
||||||
|
|
||||||
if (!time_before(jiffies, end_jiffies)) {
|
if (retry < 0 && tegra_platform_is_silicon()) {
|
||||||
gk20a_err(dev_from_gk20a(g),
|
gk20a_err(dev_from_gk20a(g),
|
||||||
"comp tag clear timeout\n");
|
"comp tag clear timeout\n");
|
||||||
err = -EBUSY;
|
err = -EBUSY;
|
||||||
@@ -166,6 +165,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
|
trace_gk20a_ltc_cbc_ctrl_done(g->dev->name);
|
||||||
mutex_unlock(&g->mm.l2_op_lock);
|
mutex_unlock(&g->mm.l2_op_lock);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -232,6 +232,8 @@ void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
|
|||||||
|
|
||||||
gk20a_dbg_fn("");
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
trace_gk20a_mm_g_elpg_flush_locked(g->dev->name);
|
||||||
|
|
||||||
for (i = 0; i < g->ltc_count; i++)
|
for (i = 0; i < g->ltc_count; i++)
|
||||||
done[i] = 0;
|
done[i] = 0;
|
||||||
|
|
||||||
@@ -255,14 +257,16 @@ void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
|
|||||||
|
|
||||||
if (num_done < g->ltc_count) {
|
if (num_done < g->ltc_count) {
|
||||||
retry--;
|
retry--;
|
||||||
usleep_range(20, 40);
|
udelay(5);
|
||||||
} else
|
} else
|
||||||
break;
|
break;
|
||||||
} while (retry >= 0 || !tegra_platform_is_silicon());
|
} while (retry >= 0 || !tegra_platform_is_silicon());
|
||||||
|
|
||||||
if (retry < 0)
|
if (retry < 0 && tegra_platform_is_silicon())
|
||||||
gk20a_warn(dev_from_gk20a(g),
|
gk20a_warn(dev_from_gk20a(g),
|
||||||
"g_elpg_flush too many retries");
|
"g_elpg_flush too many retries");
|
||||||
|
|
||||||
|
trace_gk20a_mm_g_elpg_flush_locked_done(g->dev->name);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
|
u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
|
||||||
|
|||||||
@@ -130,6 +130,16 @@ DEFINE_EVENT(gk20a, gr_gk20a_handle_sw_method,
|
|||||||
TP_ARGS(name)
|
TP_ARGS(name)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
DEFINE_EVENT(gk20a, gk20a_mm_g_elpg_flush_locked,
|
||||||
|
TP_PROTO(const char *name),
|
||||||
|
TP_ARGS(name)
|
||||||
|
);
|
||||||
|
|
||||||
|
DEFINE_EVENT(gk20a, gk20a_mm_g_elpg_flush_locked_done,
|
||||||
|
TP_PROTO(const char *name),
|
||||||
|
TP_ARGS(name)
|
||||||
|
);
|
||||||
|
|
||||||
TRACE_EVENT(gk20a_channel_update,
|
TRACE_EVENT(gk20a_channel_update,
|
||||||
TP_PROTO(const void *channel),
|
TP_PROTO(const void *channel),
|
||||||
TP_ARGS(channel),
|
TP_ARGS(channel),
|
||||||
@@ -368,6 +378,43 @@ TRACE_EVENT(gk20a_mmu_fault,
|
|||||||
__entry->engine, __entry->client, __entry->fault_type)
|
__entry->engine, __entry->client, __entry->fault_type)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(gk20a_ltc_cbc_ctrl_start,
|
||||||
|
TP_PROTO(const char *name, u32 cbc_ctrl, u32 min_value,
|
||||||
|
u32 max_value),
|
||||||
|
TP_ARGS(name, cbc_ctrl, min_value, max_value),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(const char *, name)
|
||||||
|
__field(u32, cbc_ctrl)
|
||||||
|
__field(u32, min_value)
|
||||||
|
__field(u32, max_value)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->name = name;
|
||||||
|
__entry->cbc_ctrl = cbc_ctrl;
|
||||||
|
__entry->min_value = min_value;
|
||||||
|
__entry->max_value = max_value;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("name=%s, cbc_ctrl=%d, min_value=%u, max_value=%u",
|
||||||
|
__entry->name, __entry->cbc_ctrl, __entry->min_value,
|
||||||
|
__entry->max_value)
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(gk20a_ltc_cbc_ctrl_done,
|
||||||
|
TP_PROTO(const char *name),
|
||||||
|
TP_ARGS(name),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(const char *, name)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->name = name;
|
||||||
|
),
|
||||||
|
TP_printk("name=%s ", __entry->name)
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
DECLARE_EVENT_CLASS(gk20a_cde,
|
DECLARE_EVENT_CLASS(gk20a_cde,
|
||||||
TP_PROTO(const void *ctx),
|
TP_PROTO(const void *ctx),
|
||||||
TP_ARGS(ctx),
|
TP_ARGS(ctx),
|
||||||
|
|||||||
Reference in New Issue
Block a user