gpu: nvgpu: optimize fecs status polling

bug 200078367

using udelay for fecs status polling
during GR init phase brings down fecs
transaction time to < 20usec from few
hundred usec.

Change-Id: I61a27daaf1187ac086a42779b46aa3fbee3b37f2
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Reviewed-on: http://git-master/r/691918
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Vijayakumar
2015-02-10 15:18:54 +05:30
committed by Dan Willemsen
parent dbc46f0bf2
commit aa96b6bd1e
3 changed files with 32 additions and 23 deletions

View File

@@ -365,16 +365,19 @@ static int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long end_jiffies,
static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
u32 *mailbox_ret, u32 opc_success,
u32 mailbox_ok, u32 opc_fail,
u32 mailbox_fail)
u32 mailbox_fail, bool sleepduringwait)
{
unsigned long end_jiffies = jiffies +
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
u32 delay = GR_IDLE_CHECK_DEFAULT;
u32 delay = GR_FECS_POLL_INTERVAL;
u32 check = WAIT_UCODE_LOOP;
u32 reg;
gk20a_dbg_fn("");
if (sleepduringwait)
delay = GR_IDLE_CHECK_DEFAULT;
while (check == WAIT_UCODE_LOOP) {
if (!time_before(jiffies, end_jiffies) &&
tegra_platform_is_silicon())
@@ -448,8 +451,11 @@ static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
break;
}
usleep_range(delay, delay * 2);
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
if (sleepduringwait) {
usleep_range(delay, delay * 2);
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
} else
udelay(delay);
}
if (check == WAIT_UCODE_TIMEOUT) {
@@ -472,7 +478,8 @@ static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
/* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...)
* We should replace most, if not all, fecs method calls to this instead. */
int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
struct fecs_method_op_gk20a op)
struct fecs_method_op_gk20a op,
bool sleepduringwait)
{
struct gr_gk20a *gr = &g->gr;
int ret;
@@ -497,7 +504,8 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret,
op.cond.ok, op.mailbox.ok,
op.cond.fail, op.mailbox.fail);
op.cond.fail, op.mailbox.fail,
sleepduringwait);
mutex_unlock(&gr->fecs_mutex);
@@ -515,7 +523,7 @@ static int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret)
.ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
.fail = gr_fecs_ctxsw_mailbox_value_fail_v(), },
.cond.ok = GR_IS_UCODE_OP_EQUAL,
.cond.fail = GR_IS_UCODE_OP_EQUAL });
.cond.fail = GR_IS_UCODE_OP_EQUAL }, true);
}
/* Stop processing (stall) context switches at FECS.
@@ -548,7 +556,7 @@ int gr_gk20a_halt_pipe(struct gk20a *g)
.ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
.fail = gr_fecs_ctxsw_mailbox_value_fail_v(), },
.cond.ok = GR_IS_UCODE_OP_EQUAL,
.cond.fail = GR_IS_UCODE_OP_EQUAL });
.cond.fail = GR_IS_UCODE_OP_EQUAL }, false);
}
@@ -686,7 +694,7 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
.ok = 0x10,
.fail = 0x20, },
.cond.ok = GR_IS_UCODE_OP_AND,
.cond.fail = GR_IS_UCODE_OP_AND});
.cond.fail = GR_IS_UCODE_OP_AND}, true);
if (ret)
gk20a_err(dev_from_gk20a(g),
"bind channel instance failed");
@@ -1382,7 +1390,7 @@ static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
},
.cond.ok = GR_IS_UCODE_OP_AND,
.cond.fail = GR_IS_UCODE_OP_AND,
});
}, true);
if (ret)
gk20a_err(dev_from_gk20a(g), "save context image failed");
@@ -1668,7 +1676,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
.ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
.fail = 0},
.cond.ok = GR_IS_UCODE_OP_EQUAL,
.cond.fail = GR_IS_UCODE_OP_SKIP});
.cond.fail = GR_IS_UCODE_OP_SKIP}, false);
if (ret)
gk20a_err(dev_from_gk20a(g),
@@ -2145,7 +2153,7 @@ static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g)
ret = gr_gk20a_ctx_wait_ucode(g, 0, NULL,
GR_IS_UCODE_OP_EQUAL,
eUcodeHandshakeInitComplete,
GR_IS_UCODE_OP_SKIP, 0);
GR_IS_UCODE_OP_SKIP, 0, false);
if (ret) {
gk20a_err(dev_from_gk20a(g), "falcon ucode init timeout");
return ret;
@@ -2181,7 +2189,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
op.method.addr =
gr_fecs_method_push_adr_discover_image_size_v();
op.mailbox.ret = &g->gr.ctx_vars.golden_image_size;
ret = gr_gk20a_submit_fecs_method_op(g, op);
ret = gr_gk20a_submit_fecs_method_op(g, op, false);
if (ret) {
gk20a_err(dev_from_gk20a(g),
"query golden image size failed");
@@ -2190,7 +2198,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
op.method.addr =
gr_fecs_method_push_adr_discover_zcull_image_size_v();
op.mailbox.ret = &g->gr.ctx_vars.zcull_ctxsw_image_size;
ret = gr_gk20a_submit_fecs_method_op(g, op);
ret = gr_gk20a_submit_fecs_method_op(g, op, false);
if (ret) {
gk20a_err(dev_from_gk20a(g),
"query zcull ctx image size failed");
@@ -2199,7 +2207,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
op.method.addr =
gr_fecs_method_push_adr_discover_pm_image_size_v();
op.mailbox.ret = &pm_ctx_image_size;
ret = gr_gk20a_submit_fecs_method_op(g, op);
ret = gr_gk20a_submit_fecs_method_op(g, op, false);
if (ret) {
gk20a_err(dev_from_gk20a(g),
"query pm ctx image size failed");
@@ -5798,7 +5806,7 @@ int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size)
.cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
.mailbox.ok = 0,
.cond.fail = GR_IS_UCODE_OP_SKIP,
.mailbox.fail = 0});
.mailbox.fail = 0}, false);
}
int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr)
@@ -5816,7 +5824,7 @@ int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr)
.cond.ok = GR_IS_UCODE_OP_EQUAL,
.mailbox.ok = 1,
.cond.fail = GR_IS_UCODE_OP_SKIP,
.mailbox.fail = 0});
.mailbox.fail = 0}, false);
}
int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va)
@@ -5832,7 +5840,7 @@ int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va)
.cond.ok = GR_IS_UCODE_OP_EQUAL,
.mailbox.ok = 1,
.cond.fail = GR_IS_UCODE_OP_SKIP,
.mailbox.fail = 0});
.mailbox.fail = 0}, false);
}
int gk20a_gr_suspend(struct gk20a *g)

View File

@@ -28,6 +28,7 @@
#define GR_IDLE_CHECK_DEFAULT 100 /* usec */
#define GR_IDLE_CHECK_MAX 5000 /* usec */
#define GR_FECS_POLL_INTERVAL 5 /* usec */
#define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFF
#define INVALID_MAX_WAYS 0xFFFFFFFF
@@ -491,7 +492,8 @@ int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies,
u32 expect_delay);
int gr_gk20a_init_ctx_state(struct gk20a *g);
int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
struct fecs_method_op_gk20a op);
struct fecs_method_op_gk20a op,
bool sleepduringwait);
int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm,
u32 class, u32 padding);

View File

@@ -1,7 +1,7 @@
/*
* GM20B GPC MMU
*
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -672,7 +672,7 @@ static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g)
static int gr_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout, u32 val)
{
unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
unsigned long delay = GR_IDLE_CHECK_DEFAULT;
unsigned long delay = GR_FECS_POLL_INTERVAL;
u32 reg;
gk20a_dbg_fn("");
@@ -681,8 +681,7 @@ static int gr_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout, u32 val)
reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
if (reg == val)
return 0;
usleep_range(delay, delay * 2);
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
udelay(delay);
} while (time_before(jiffies, end_jiffies) ||
!tegra_platform_is_silicon());