diff --git a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h index 38a3121f0..28c58f50f 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h @@ -94,6 +94,14 @@ static inline u32 ltc_ltcs_ltss_cbc_ctrl1_r(void) { return 0x0017e26c; } +static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(void) +{ + return 0x1; +} +static inline u32 ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(void) +{ + return 0x2; +} static inline u32 ltc_ltcs_ltss_cbc_ctrl1_clear_v(u32 r) { return (r >> 2) & 0x1; diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 134ac491b..305e28c2f 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c @@ -103,10 +103,12 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) return 0; } -static int gm20b_ltc_clear_comptags(struct gk20a *g, u32 min, u32 max) +static int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max) { + int err = 0; struct gr_gk20a *gr = &g->gr; - u32 fbp, slice, ctrl1, val; + u32 fbp, slice, ctrl1, val, hw_op = 0; unsigned long end_jiffies = jiffies + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); u32 delay = GR_IDLE_CHECK_DEFAULT; @@ -118,13 +120,23 @@ static int gm20b_ltc_clear_comptags(struct gk20a *g, u32 min, u32 max) if (gr->compbit_store.size == 0) return 0; - gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(), - ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min)); - gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(), - ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max)); + mutex_lock(&g->mm.l2_op_lock); + + if (op == gk20a_cbc_op_clear) { + gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(), + ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min)); + gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(), + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max)); + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); + } else if (op == gk20a_cbc_op_clean) { + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); + } else if (op == gk20a_cbc_op_invalidate) { + hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); + } else { + BUG_ON(1); + } gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), - gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | - ltc_ltcs_ltss_cbc_ctrl1_clear_active_f()); + gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) || hw_op); for (fbp = 0; fbp < gr->num_fbps; fbp++) { for (slice = 0; slice < slices_per_ltc; slice++) { @@ -137,8 +149,7 @@ static int gm20b_ltc_clear_comptags(struct gk20a *g, u32 min, u32 max) do { val = gk20a_readl(g, ctrl1); - if (ltc_ltcs_ltss_cbc_ctrl1_clear_v(val) != - ltc_ltcs_ltss_cbc_ctrl1_clear_active_v()) + if (!(val & hw_op)) break; usleep_range(delay, delay * 2); @@ -151,11 +162,13 @@ static int gm20b_ltc_clear_comptags(struct gk20a *g, u32 min, u32 max) if (!time_before(jiffies, end_jiffies)) { gk20a_err(dev_from_gk20a(g), "comp tag clear timeout\n"); - return -EBUSY; + err = -EBUSY; + goto out; } } } - +out: + mutex_unlock(&g->mm.l2_op_lock); return 0; } @@ -192,6 +205,6 @@ void gm20b_init_ltc(struct gpu_ops *gops) /* GM20b specific ops. */ gops->ltc.init_fs_state = gm20b_ltc_init_fs_state; gops->ltc.init_comptags = gm20b_ltc_init_comptags; - gops->ltc.clear_comptags = gm20b_ltc_clear_comptags; + gops->ltc.cbc_ctrl = gm20b_ltc_cbc_ctrl; gops->ltc.elpg_flush = gk20a_mm_g_elpg_flush_locked; }