mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: optimize fecs status polling
bug 200078367 using udelay for fecs status polling during GR init phase brings down fecs transaction time to < 20usec from few hundred usec. Change-Id: I61a27daaf1187ac086a42779b46aa3fbee3b37f2 Signed-off-by: Vijayakumar <vsubbu@nvidia.com> Reviewed-on: http://git-master/r/691918 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Dan Willemsen
parent
dbc46f0bf2
commit
aa96b6bd1e
@@ -365,16 +365,19 @@ static int gr_gk20a_wait_fe_idle(struct gk20a *g, unsigned long end_jiffies,
|
||||
static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
|
||||
u32 *mailbox_ret, u32 opc_success,
|
||||
u32 mailbox_ok, u32 opc_fail,
|
||||
u32 mailbox_fail)
|
||||
u32 mailbox_fail, bool sleepduringwait)
|
||||
{
|
||||
unsigned long end_jiffies = jiffies +
|
||||
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
|
||||
u32 delay = GR_IDLE_CHECK_DEFAULT;
|
||||
u32 delay = GR_FECS_POLL_INTERVAL;
|
||||
u32 check = WAIT_UCODE_LOOP;
|
||||
u32 reg;
|
||||
|
||||
gk20a_dbg_fn("");
|
||||
|
||||
if (sleepduringwait)
|
||||
delay = GR_IDLE_CHECK_DEFAULT;
|
||||
|
||||
while (check == WAIT_UCODE_LOOP) {
|
||||
if (!time_before(jiffies, end_jiffies) &&
|
||||
tegra_platform_is_silicon())
|
||||
@@ -448,8 +451,11 @@ static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
|
||||
break;
|
||||
}
|
||||
|
||||
usleep_range(delay, delay * 2);
|
||||
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
|
||||
if (sleepduringwait) {
|
||||
usleep_range(delay, delay * 2);
|
||||
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
|
||||
} else
|
||||
udelay(delay);
|
||||
}
|
||||
|
||||
if (check == WAIT_UCODE_TIMEOUT) {
|
||||
@@ -472,7 +478,8 @@ static int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
|
||||
/* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...)
|
||||
* We should replace most, if not all, fecs method calls to this instead. */
|
||||
int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
|
||||
struct fecs_method_op_gk20a op)
|
||||
struct fecs_method_op_gk20a op,
|
||||
bool sleepduringwait)
|
||||
{
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
int ret;
|
||||
@@ -497,7 +504,8 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
|
||||
|
||||
ret = gr_gk20a_ctx_wait_ucode(g, op.mailbox.id, op.mailbox.ret,
|
||||
op.cond.ok, op.mailbox.ok,
|
||||
op.cond.fail, op.mailbox.fail);
|
||||
op.cond.fail, op.mailbox.fail,
|
||||
sleepduringwait);
|
||||
|
||||
mutex_unlock(&gr->fecs_mutex);
|
||||
|
||||
@@ -515,7 +523,7 @@ static int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret)
|
||||
.ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
|
||||
.fail = gr_fecs_ctxsw_mailbox_value_fail_v(), },
|
||||
.cond.ok = GR_IS_UCODE_OP_EQUAL,
|
||||
.cond.fail = GR_IS_UCODE_OP_EQUAL });
|
||||
.cond.fail = GR_IS_UCODE_OP_EQUAL }, true);
|
||||
}
|
||||
|
||||
/* Stop processing (stall) context switches at FECS.
|
||||
@@ -548,7 +556,7 @@ int gr_gk20a_halt_pipe(struct gk20a *g)
|
||||
.ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
|
||||
.fail = gr_fecs_ctxsw_mailbox_value_fail_v(), },
|
||||
.cond.ok = GR_IS_UCODE_OP_EQUAL,
|
||||
.cond.fail = GR_IS_UCODE_OP_EQUAL });
|
||||
.cond.fail = GR_IS_UCODE_OP_EQUAL }, false);
|
||||
}
|
||||
|
||||
|
||||
@@ -686,7 +694,7 @@ static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
|
||||
.ok = 0x10,
|
||||
.fail = 0x20, },
|
||||
.cond.ok = GR_IS_UCODE_OP_AND,
|
||||
.cond.fail = GR_IS_UCODE_OP_AND});
|
||||
.cond.fail = GR_IS_UCODE_OP_AND}, true);
|
||||
if (ret)
|
||||
gk20a_err(dev_from_gk20a(g),
|
||||
"bind channel instance failed");
|
||||
@@ -1382,7 +1390,7 @@ static int gr_gk20a_fecs_ctx_image_save(struct channel_gk20a *c, u32 save_type)
|
||||
},
|
||||
.cond.ok = GR_IS_UCODE_OP_AND,
|
||||
.cond.fail = GR_IS_UCODE_OP_AND,
|
||||
});
|
||||
}, true);
|
||||
|
||||
if (ret)
|
||||
gk20a_err(dev_from_gk20a(g), "save context image failed");
|
||||
@@ -1668,7 +1676,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
|
||||
.ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
|
||||
.fail = 0},
|
||||
.cond.ok = GR_IS_UCODE_OP_EQUAL,
|
||||
.cond.fail = GR_IS_UCODE_OP_SKIP});
|
||||
.cond.fail = GR_IS_UCODE_OP_SKIP}, false);
|
||||
|
||||
if (ret)
|
||||
gk20a_err(dev_from_gk20a(g),
|
||||
@@ -2145,7 +2153,7 @@ static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g)
|
||||
ret = gr_gk20a_ctx_wait_ucode(g, 0, NULL,
|
||||
GR_IS_UCODE_OP_EQUAL,
|
||||
eUcodeHandshakeInitComplete,
|
||||
GR_IS_UCODE_OP_SKIP, 0);
|
||||
GR_IS_UCODE_OP_SKIP, 0, false);
|
||||
if (ret) {
|
||||
gk20a_err(dev_from_gk20a(g), "falcon ucode init timeout");
|
||||
return ret;
|
||||
@@ -2181,7 +2189,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
|
||||
op.method.addr =
|
||||
gr_fecs_method_push_adr_discover_image_size_v();
|
||||
op.mailbox.ret = &g->gr.ctx_vars.golden_image_size;
|
||||
ret = gr_gk20a_submit_fecs_method_op(g, op);
|
||||
ret = gr_gk20a_submit_fecs_method_op(g, op, false);
|
||||
if (ret) {
|
||||
gk20a_err(dev_from_gk20a(g),
|
||||
"query golden image size failed");
|
||||
@@ -2190,7 +2198,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
|
||||
op.method.addr =
|
||||
gr_fecs_method_push_adr_discover_zcull_image_size_v();
|
||||
op.mailbox.ret = &g->gr.ctx_vars.zcull_ctxsw_image_size;
|
||||
ret = gr_gk20a_submit_fecs_method_op(g, op);
|
||||
ret = gr_gk20a_submit_fecs_method_op(g, op, false);
|
||||
if (ret) {
|
||||
gk20a_err(dev_from_gk20a(g),
|
||||
"query zcull ctx image size failed");
|
||||
@@ -2199,7 +2207,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
|
||||
op.method.addr =
|
||||
gr_fecs_method_push_adr_discover_pm_image_size_v();
|
||||
op.mailbox.ret = &pm_ctx_image_size;
|
||||
ret = gr_gk20a_submit_fecs_method_op(g, op);
|
||||
ret = gr_gk20a_submit_fecs_method_op(g, op, false);
|
||||
if (ret) {
|
||||
gk20a_err(dev_from_gk20a(g),
|
||||
"query pm ctx image size failed");
|
||||
@@ -5798,7 +5806,7 @@ int gr_gk20a_fecs_get_reglist_img_size(struct gk20a *g, u32 *size)
|
||||
.cond.ok = GR_IS_UCODE_OP_NOT_EQUAL,
|
||||
.mailbox.ok = 0,
|
||||
.cond.fail = GR_IS_UCODE_OP_SKIP,
|
||||
.mailbox.fail = 0});
|
||||
.mailbox.fail = 0}, false);
|
||||
}
|
||||
|
||||
int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr)
|
||||
@@ -5816,7 +5824,7 @@ int gr_gk20a_fecs_set_reglist_bind_inst(struct gk20a *g, phys_addr_t addr)
|
||||
.cond.ok = GR_IS_UCODE_OP_EQUAL,
|
||||
.mailbox.ok = 1,
|
||||
.cond.fail = GR_IS_UCODE_OP_SKIP,
|
||||
.mailbox.fail = 0});
|
||||
.mailbox.fail = 0}, false);
|
||||
}
|
||||
|
||||
int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va)
|
||||
@@ -5832,7 +5840,7 @@ int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va)
|
||||
.cond.ok = GR_IS_UCODE_OP_EQUAL,
|
||||
.mailbox.ok = 1,
|
||||
.cond.fail = GR_IS_UCODE_OP_SKIP,
|
||||
.mailbox.fail = 0});
|
||||
.mailbox.fail = 0}, false);
|
||||
}
|
||||
|
||||
int gk20a_gr_suspend(struct gk20a *g)
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
|
||||
#define GR_IDLE_CHECK_DEFAULT 100 /* usec */
|
||||
#define GR_IDLE_CHECK_MAX 5000 /* usec */
|
||||
#define GR_FECS_POLL_INTERVAL 5 /* usec */
|
||||
|
||||
#define INVALID_SCREEN_TILE_ROW_OFFSET 0xFFFFFFFF
|
||||
#define INVALID_MAX_WAYS 0xFFFFFFFF
|
||||
@@ -491,7 +492,8 @@ int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies,
|
||||
u32 expect_delay);
|
||||
int gr_gk20a_init_ctx_state(struct gk20a *g);
|
||||
int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
|
||||
struct fecs_method_op_gk20a op);
|
||||
struct fecs_method_op_gk20a op,
|
||||
bool sleepduringwait);
|
||||
int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
|
||||
struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm,
|
||||
u32 class, u32 padding);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* GM20B GPC MMU
|
||||
*
|
||||
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -672,7 +672,7 @@ static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g)
|
||||
static int gr_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout, u32 val)
|
||||
{
|
||||
unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
|
||||
unsigned long delay = GR_IDLE_CHECK_DEFAULT;
|
||||
unsigned long delay = GR_FECS_POLL_INTERVAL;
|
||||
u32 reg;
|
||||
|
||||
gk20a_dbg_fn("");
|
||||
@@ -681,8 +681,7 @@ static int gr_gm20b_ctx_wait_lsf_ready(struct gk20a *g, u32 timeout, u32 val)
|
||||
reg = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(0));
|
||||
if (reg == val)
|
||||
return 0;
|
||||
usleep_range(delay, delay * 2);
|
||||
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
|
||||
udelay(delay);
|
||||
} while (time_before(jiffies, end_jiffies) ||
|
||||
!tegra_platform_is_silicon());
|
||||
|
||||
|
||||
Reference in New Issue
Block a user