gpu: nvgpu: Implement ELPG flush for gm20b

ELPG flush is initiated from a common broadcast register, but must be
waited on via per-L2 registers. Split gk20a and gm20b versions of
the flush.

Change-Id: I75c2d65e8da311b50d35bee70308b60464ec2d4d
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/401545
Reviewed-by: Automatic_Commit_Validation_User
This commit is contained in:
Terje Bergstrom
2014-04-25 15:00:54 +03:00
committed by Dan Willemsen
parent 24fc5e36a7
commit 1c9aaa1eaf
4 changed files with 97 additions and 37 deletions

View File

@@ -313,37 +313,3 @@ static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr)
0, max_comptag_lines - 1);
}
/* Flushes the compression bit cache as well as "data".
* Note: the name here is a bit of a misnomer. ELPG uses this
* internally... but ELPG doesn't have to be on to do it manually.
*/
static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
{
u32 data;
s32 retry = 100;
gk20a_dbg_fn("");
/* Make sure all previous writes are committed to the L2. There's no
guarantee that writes are to DRAM. This will be a sysmembar internal
to the L2. */
gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
ltc_ltcs_ltss_g_elpg_flush_pending_f());
do {
data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());
if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
gk20a_dbg_info("g_elpg_flush 0x%x", data);
retry--;
usleep_range(20, 40);
} else
break;
} while (retry >= 0 || !tegra_platform_is_silicon());
if (retry < 0)
gk20a_warn(dev_from_gk20a(g),
"g_elpg_flush too many retries");
}

View File

@@ -212,6 +212,40 @@ void gk20a_ltc_isr(struct gk20a *g)
gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
}
/* Flushes the compression bit cache as well as "data".
* Note: the name here is a bit of a misnomer. ELPG uses this
* internally... but ELPG doesn't have to be on to do it manually.
*/
static void gk20a_mm_g_elpg_flush_locked(struct gk20a *g)
{
u32 data;
s32 retry = 100;
gk20a_dbg_fn("");
/* Make sure all previous writes are committed to the L2. There's no
guarantee that writes are to DRAM. This will be a sysmembar internal
to the L2. */
gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
ltc_ltcs_ltss_g_elpg_flush_pending_f());
do {
data = gk20a_readl(g, ltc_ltc0_ltss_g_elpg_r());
if (ltc_ltc0_ltss_g_elpg_flush_v(data) ==
ltc_ltc0_ltss_g_elpg_flush_pending_v()) {
gk20a_dbg_info("g_elpg_flush 0x%x", data);
retry--;
usleep_range(20, 40);
} else
break;
} while (retry >= 0 || !tegra_platform_is_silicon());
if (retry < 0)
gk20a_warn(dev_from_gk20a(g),
"g_elpg_flush too many retries");
}
void gk20a_init_ltc(struct gpu_ops *gops)
{
gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes;

View File

@@ -96,11 +96,11 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void)
}
static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void)
{
return 0x1;
return 0x1;
}
static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void)
{
return 0x2;
return 0x2;
}
static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r)
{
@@ -258,6 +258,22 @@ static inline u32 ltc_ltc0_ltss_g_elpg_flush_pending_f(void)
{
return 0x1;
}
static inline u32 ltc_ltc1_ltss_g_elpg_r(void)
{
return 0x00142214;
}
static inline u32 ltc_ltc1_ltss_g_elpg_flush_v(u32 r)
{
return (r >> 0) & 0x1;
}
static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_v(void)
{
return 0x00000001;
}
static inline u32 ltc_ltc1_ltss_g_elpg_flush_pending_f(void)
{
return 0x1;
}
static inline u32 ltc_ltc0_ltss_intr_r(void)
{
return 0x0014020c;

View File

@@ -193,6 +193,50 @@ void gm20b_ltc_isr(struct gk20a *g)
gk20a_writel(g, ltc_ltc0_ltss_intr_r(), intr);
}
static void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
{
u32 data;
bool done[g->ltc_count];
s32 retry = 100;
int i;
int num_done = 0;
u32 ltc_d = ltc_ltc1_ltss_g_elpg_r() - ltc_ltc0_ltss_g_elpg_r();
gk20a_dbg_fn("");
for (i = 0; i < g->ltc_count; i++)
done[i] = 0;
gk20a_writel(g, ltc_ltcs_ltss_g_elpg_r(),
ltc_ltcs_ltss_g_elpg_flush_pending_f());
do {
for (i = 0; i < g->ltc_count; i++) {
if (done[i])
continue;
data = gk20a_readl(g,
ltc_ltc0_ltss_g_elpg_r() + ltc_d * i);
if (ltc_ltc0_ltss_g_elpg_flush_v(data)) {
gk20a_dbg_info("g_elpg_flush 0x%x", data);
} else {
done[i] = 1;
num_done++;
}
}
if (num_done < g->ltc_count) {
retry--;
usleep_range(20, 40);
} else
break;
} while (retry >= 0 || !tegra_platform_is_silicon());
if (retry < 0)
gk20a_warn(dev_from_gk20a(g),
"g_elpg_flush too many retries");
}
void gm20b_init_ltc(struct gpu_ops *gops)
{
/* Gk20a reused ops. */
@@ -209,6 +253,6 @@ void gm20b_init_ltc(struct gpu_ops *gops)
gops->ltc.init_fs_state = gm20b_ltc_init_fs_state;
gops->ltc.init_comptags = gm20b_ltc_init_comptags;
gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl;
gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked;
gops->ltc.elpg_flush = gm20b_ltc_g_elpg_flush_locked;
gops->ltc.isr = gm20b_ltc_isr;
}