diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index f19182379..cd8aac8f3 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -147,7 +147,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .get_fbp_en_mask = vgpu_gr_get_fbp_en_mask, .get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask, .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info, - .wait_empty = NULL, .init_cyclestats = vgpu_gr_init_cyclestats, .set_sm_debug_mode = vgpu_gr_set_sm_debug_mode, .bpt_reg_info = NULL, diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index 2131aade5..6a23bf7e6 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -165,7 +165,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .get_fbp_en_mask = vgpu_gr_get_fbp_en_mask, .get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask, .init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info, - .wait_empty = NULL, .init_cyclestats = vgpu_gr_init_cyclestats, .set_sm_debug_mode = vgpu_gr_set_sm_debug_mode, .bpt_reg_info = NULL, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 0befa4743..0cfdb1833 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -4171,7 +4171,7 @@ int gk20a_gr_suspend(struct gk20a *g) nvgpu_log_fn(g, " "); - ret = g->ops.gr.wait_empty(g); + ret = g->ops.gr.init.wait_empty(g); if (ret != 0) { return ret; } diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index f9ad120f0..856160c2a 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -270,7 +270,6 @@ static const struct gpu_ops gm20b_ops = { .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask, .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask, .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info, - .wait_empty = gm20b_gr_init_wait_idle, .init_cyclestats = gr_gm20b_init_cyclestats, .set_sm_debug_mode = gr_gk20a_set_sm_debug_mode, .bpt_reg_info = gr_gm20b_bpt_reg_info, @@ -425,6 +424,7 @@ static const struct gpu_ops gm20b_ops = { .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, .cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num, + .wait_empty = gm20b_gr_init_wait_idle, .wait_idle = gm20b_gr_init_wait_idle, .wait_fe_idle = gm20b_gr_init_wait_fe_idle, .fe_pwr_mode_force_on = diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index aea59aaca..61bd79374 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -1207,67 +1207,6 @@ int gr_gp10b_dump_gr_status_regs(struct gk20a *g, return 0; } -static bool gr_activity_empty_or_preempted(u32 val) -{ - while(val != 0U) { - u32 v = val & 7U; - if (v != gr_activity_4_gpc0_empty_v() && - v != gr_activity_4_gpc0_preempted_v()) { - return false; - } - val >>= 3; - } - - return true; -} - -int gr_gp10b_wait_empty(struct gk20a *g) -{ - u32 delay = GR_IDLE_CHECK_DEFAULT; - bool ctxsw_active; - bool gr_busy; - u32 gr_status; - u32 activity0, activity1, activity2, activity4; - struct nvgpu_timeout timeout; - - nvgpu_log_fn(g, " "); - - nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), - NVGPU_TIMER_CPU_TIMER); - - do { - /* fmodel: host gets fifo_engine_status(gr) from gr - only when gr_status is read */ - gr_status = gk20a_readl(g, gr_status_r()); - - ctxsw_active = (gr_status & BIT32(7)) != 0U; - - activity0 = gk20a_readl(g, gr_activity_0_r()); - activity1 = gk20a_readl(g, gr_activity_1_r()); - activity2 = gk20a_readl(g, gr_activity_2_r()); - activity4 = gk20a_readl(g, gr_activity_4_r()); - - gr_busy = !(gr_activity_empty_or_preempted(activity0) && - gr_activity_empty_or_preempted(activity1) && - activity2 == 0U && - gr_activity_empty_or_preempted(activity4)); - - if (!gr_busy && !ctxsw_active) { - nvgpu_log_fn(g, "done"); - return 0; - } - - nvgpu_usleep_range(delay, delay * 2U); - delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); - } while (nvgpu_timeout_expired(&timeout) == 0); - - nvgpu_err(g, - "timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x", - ctxsw_active, gr_busy, activity0, activity1, activity2, activity4); - - return -EAGAIN; -} - void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, u64 addr, bool patch) diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h index 2a19f737f..385ea0194 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h @@ -97,7 +97,6 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx); int gr_gp10b_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o); -int gr_gp10b_wait_empty(struct gk20a *g); void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, u64 addr, bool patch); diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index cb5db925b..983f75784 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -292,7 +292,6 @@ static const struct gpu_ops gp10b_ops = { .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask, .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask, .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info, - .wait_empty = gr_gp10b_wait_empty, .init_cyclestats = gr_gm20b_init_cyclestats, .set_sm_debug_mode = gr_gk20a_set_sm_debug_mode, .bpt_reg_info = gr_gm20b_bpt_reg_info, @@ -495,6 +494,7 @@ static const struct gpu_ops gp10b_ops = { .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, .cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num, + .wait_empty = gp10b_gr_init_wait_empty, .wait_idle = gm20b_gr_init_wait_idle, .wait_fe_idle = gm20b_gr_init_wait_fe_idle, .fe_pwr_mode_force_on = diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 1df8cfce2..5cfa58bf7 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -397,7 +397,6 @@ static const struct gpu_ops gv100_ops = { .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask, .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask, .init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info, - .wait_empty = gr_gv11b_wait_empty, .init_cyclestats = gr_gm20b_init_cyclestats, .set_sm_debug_mode = gv11b_gr_set_sm_debug_mode, .bpt_reg_info = gv11b_gr_bpt_reg_info, @@ -632,6 +631,7 @@ static const struct gpu_ops gv100_ops = { .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, .cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num, + .wait_empty = gp10b_gr_init_wait_empty, .wait_idle = gm20b_gr_init_wait_idle, .wait_fe_idle = gm20b_gr_init_wait_fe_idle, .fe_pwr_mode_force_on = diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 52fafc40b..ae27a5e3d 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1825,73 +1825,6 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g, return 0; } -static bool gr_activity_empty_or_preempted(u32 val) -{ - while (val != 0U) { - u32 v = val & 7U; - if (v != gr_activity_4_gpc0_empty_v() && - v != gr_activity_4_gpc0_preempted_v()) { - return false; - } - val >>= 3; - } - - return true; -} - -int gr_gv11b_wait_empty(struct gk20a *g) -{ - u32 delay = GR_IDLE_CHECK_DEFAULT; - bool ctxsw_active; - bool gr_busy; - u32 gr_status; - u32 activity0, activity1, activity2, activity4; - struct nvgpu_timeout timeout; - int err; - - nvgpu_log_fn(g, " "); - - err = nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), - NVGPU_TIMER_CPU_TIMER); - if (err != 0) { - nvgpu_err(g, "timeout_init failed: %d", err); - return err; - } - - do { - /* fmodel: host gets fifo_engine_status(gr) from gr - only when gr_status is read */ - gr_status = gk20a_readl(g, gr_status_r()); - - ctxsw_active = (gr_status & BIT32(7)) != 0U; - - activity0 = gk20a_readl(g, gr_activity_0_r()); - activity1 = gk20a_readl(g, gr_activity_1_r()); - activity2 = gk20a_readl(g, gr_activity_2_r()); - activity4 = gk20a_readl(g, gr_activity_4_r()); - - gr_busy = !(gr_activity_empty_or_preempted(activity0) && - gr_activity_empty_or_preempted(activity1) && - activity2 == 0U && - gr_activity_empty_or_preempted(activity4)); - - if (!gr_busy && !ctxsw_active) { - nvgpu_log_fn(g, "done"); - return 0; - } - - nvgpu_usleep_range(delay, delay * 2U); - delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); - - } while (nvgpu_timeout_expired(&timeout) == 0); - - nvgpu_err(g, - "timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x", - ctxsw_active, gr_busy, activity0, activity1, activity2, activity4); - - return -EAGAIN; -} - void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, u64 addr, bool patch) diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index 80113c340..f42617ca8 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -106,7 +106,6 @@ void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data); void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data); int gr_gv11b_dump_gr_status_regs(struct gk20a *g, struct gk20a_debug_output *o); -int gr_gv11b_wait_empty(struct gk20a *g); void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, u64 addr, bool patch); diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index ae417c971..163fb1974 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -349,7 +349,6 @@ static const struct gpu_ops gv11b_ops = { .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask, .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask, .init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info, - .wait_empty = gr_gv11b_wait_empty, .init_cyclestats = gr_gm20b_init_cyclestats, .set_sm_debug_mode = gv11b_gr_set_sm_debug_mode, .bpt_reg_info = gv11b_gr_bpt_reg_info, @@ -591,6 +590,7 @@ static const struct gpu_ops gv11b_ops = { .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, .cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num, + .wait_empty = gp10b_gr_init_wait_empty, .wait_idle = gm20b_gr_init_wait_idle, .wait_fe_idle = gm20b_gr_init_wait_fe_idle, .fe_pwr_mode_force_on = diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c index 78d87b0b9..adcac7d38 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c @@ -445,7 +445,7 @@ int gm20b_gr_init_wait_fe_idle(struct gk20a *g) nvgpu_log_fn(g, " "); err = nvgpu_timeout_init(g, &timeout, nvgpu_gr_get_idle_timeout(g), - NVGPU_TIMER_CPU_TIMER); + NVGPU_TIMER_CPU_TIMER); if (err != 0) { return err; } diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c index cebb0d1a8..3b364f6d8 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.c @@ -24,6 +24,7 @@ #include #include +#include #include "gr_init_gm20b.h" #include "gr_init_gp10b.h" @@ -79,6 +80,73 @@ int gp10b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, return 0; } +static bool gr_activity_empty_or_preempted(u32 val) +{ + while (val != 0U) { + u32 v = val & 7U; + + if (v != gr_activity_4_gpc0_empty_v() && + v != gr_activity_4_gpc0_preempted_v()) { + return false; + } + val >>= 3; + } + + return true; +} + +int gp10b_gr_init_wait_empty(struct gk20a *g) +{ + u32 delay = NVGPU_GR_IDLE_CHECK_DEFAULT_US; + bool ctxsw_active; + bool gr_busy; + u32 gr_status; + u32 activity0, activity1, activity2, activity4; + struct nvgpu_timeout timeout; + int err; + + nvgpu_log_fn(g, " "); + + err = nvgpu_timeout_init(g, &timeout, nvgpu_gr_get_idle_timeout(g), + NVGPU_TIMER_CPU_TIMER); + if (err != 0) { + nvgpu_err(g, "timeout_init failed: %d", err); + return err; + } + + do { + /* fmodel: host gets fifo_engine_status(gr) from gr + only when gr_status is read */ + gr_status = nvgpu_readl(g, gr_status_r()); + + ctxsw_active = (gr_status & BIT32(7)) != 0U; + + activity0 = nvgpu_readl(g, gr_activity_0_r()); + activity1 = nvgpu_readl(g, gr_activity_1_r()); + activity2 = nvgpu_readl(g, gr_activity_2_r()); + activity4 = nvgpu_readl(g, gr_activity_4_r()); + + gr_busy = !(gr_activity_empty_or_preempted(activity0) && + gr_activity_empty_or_preempted(activity1) && + activity2 == 0U && + gr_activity_empty_or_preempted(activity4)); + + if (!gr_busy && !ctxsw_active) { + nvgpu_log_fn(g, "done"); + return 0; + } + + nvgpu_usleep_range(delay, delay * 2U); + delay = min_t(u32, delay << 1, NVGPU_GR_IDLE_CHECK_MAX_US); + } while (nvgpu_timeout_expired(&timeout) == 0); + + nvgpu_err(g, + "timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x", + ctxsw_active, gr_busy, activity0, activity1, activity2, activity4); + + return -EAGAIN; +} + int gp10b_gr_init_fs_state(struct gk20a *g) { u32 data; diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.h index 5422f762b..9ceac20c8 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gp10b.h @@ -30,6 +30,7 @@ struct gk20a; u32 gp10b_gr_init_get_sm_id_size(void); int gp10b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, struct nvgpu_gr_config *gr_config); +int gp10b_gr_init_wait_empty(struct gk20a *g); int gp10b_gr_init_fs_state(struct gk20a *g); int gp10b_gr_init_preemption_state(struct gk20a *g, u32 gfxp_wfi_timeout_count, bool gfxp_wfi_timeout_unit_usec); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index cc0cbfb45..0d2e7bc89 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -354,7 +354,6 @@ struct gpu_ops { u32* (*get_rop_l2_en_mask)(struct gk20a *g); void (*init_sm_dsm_reg_info)(void); void (*init_ovr_sm_dsm_perf)(void); - int (*wait_empty)(struct gk20a *g); void (*init_cyclestats)(struct gk20a *g); int (*set_sm_debug_mode)(struct gk20a *g, struct channel_gk20a *ch, u64 sms, bool enable); @@ -678,6 +677,7 @@ struct gpu_ops { void (*cwd_gpcs_tpcs_num)(struct gk20a *g, u32 gpc_count, u32 tpc_count); + int (*wait_empty)(struct gk20a *g); int (*wait_idle)(struct gk20a *g); int (*wait_fe_idle)(struct gk20a *g); int (*fe_pwr_mode_force_on)(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 92204598c..639c48721 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -417,7 +417,6 @@ static const struct gpu_ops tu104_ops = { .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask, .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask, .init_sm_dsm_reg_info = gr_tu104_init_sm_dsm_reg_info, - .wait_empty = gr_gv11b_wait_empty, .init_cyclestats = gr_gm20b_init_cyclestats, .set_sm_debug_mode = gv11b_gr_set_sm_debug_mode, .bpt_reg_info = gv11b_gr_bpt_reg_info, @@ -660,6 +659,7 @@ static const struct gpu_ops tu104_ops = { .pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc, .pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc, .cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num, + .wait_empty = gp10b_gr_init_wait_empty, .wait_idle = gm20b_gr_init_wait_idle, .wait_fe_idle = gm20b_gr_init_wait_fe_idle, .fe_pwr_mode_force_on =