From caac47c4fab39600a46e0efbd699ea8fadeabeb5 Mon Sep 17 00:00:00 2001 From: Vinod G Date: Wed, 13 Mar 2019 13:15:19 -0700 Subject: [PATCH] gpu: nvgpu: add new gr.init hals create new hals for wait_idle and wait_fe_idle under gr.init. modify functions to following hals and use same hals for all chips. gr_gk20a_wait_idle -> gm20b_gr_init_wait_idle gr_gk20a_wait_fe_idle -> gm20b_gr_init_wait_fe_idle JIRA NVGPU-2951 Change-Id: Ie60675a08cba12e31557711b6f05f06879de8965 Signed-off-by: Vinod G Reviewed-on: https://git-master.nvidia.com/r/2072051 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 104 ++---------------- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 3 - drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 4 +- drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 + drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 + drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 5 +- drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 2 + drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c | 93 ++++++++++++++++ drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h | 2 + drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 2 + drivers/gpu/nvgpu/tu104/gr_tu104.c | 5 +- drivers/gpu/nvgpu/tu104/hal_tu104.c | 2 + 12 files changed, 124 insertions(+), 102 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 8747743e6..a92152070 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -225,90 +225,6 @@ static void gr_gk20a_load_falcon_imem(struct gk20a *g) } } -int gr_gk20a_wait_idle(struct gk20a *g) -{ - u32 delay = GR_IDLE_CHECK_DEFAULT; - bool ctxsw_active; - bool gr_busy; - u32 gr_engine_id; - - struct nvgpu_engine_status_info engine_status; - bool ctx_status_invalid; - struct nvgpu_timeout timeout; - - nvgpu_log_fn(g, " "); - - gr_engine_id = nvgpu_engine_get_gr_id(g); - - nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), - NVGPU_TIMER_CPU_TIMER); - - do { - /* fmodel: host gets fifo_engine_status(gr) from gr - only when gr_status is read */ - (void) gk20a_readl(g, gr_status_r()); - - g->ops.engine_status.read_engine_status_info(g, gr_engine_id, - &engine_status); - - ctxsw_active = engine_status.ctxsw_in_progress; - - ctx_status_invalid = nvgpu_engine_status_is_ctxsw_invalid( - &engine_status); - - gr_busy = (gk20a_readl(g, gr_engine_status_r()) & - gr_engine_status_value_busy_f()) != 0U; - - if (ctx_status_invalid || (!gr_busy && !ctxsw_active)) { - nvgpu_log_fn(g, "done"); - return 0; - } - - nvgpu_usleep_range(delay, delay * 2U); - delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); - - } while (nvgpu_timeout_expired(&timeout) == 0); - - nvgpu_err(g, - "timeout, ctxsw busy : %d, gr busy : %d", - ctxsw_active, gr_busy); - - return -EAGAIN; -} - -int gr_gk20a_wait_fe_idle(struct gk20a *g) -{ - u32 val; - u32 delay = GR_IDLE_CHECK_DEFAULT; - struct nvgpu_timeout timeout; - - if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { - return 0; - } - - nvgpu_log_fn(g, " "); - - nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), - NVGPU_TIMER_CPU_TIMER); - - do { - val = gk20a_readl(g, gr_status_r()); - - if (gr_status_fe_method_lower_v(val) == 0U) { - nvgpu_log_fn(g, "done"); - return 0; - } - - nvgpu_usleep_range(delay, delay * 2U); - delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX); - } while (nvgpu_timeout_expired(&timeout) == 0); - - nvgpu_err(g, - "timeout, fe busy : %x", val); - - return -EAGAIN; -} - int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, u32 *mailbox_ret, u32 opc_success, u32 mailbox_ok, u32 opc_fail, @@ -1150,13 +1066,13 @@ int gk20a_init_sw_bundle(struct gk20a *g) if (gr_pipe_bundle_address_value_v(sw_bundle_init->l[i].addr) == GR_GO_IDLE_BUNDLE) { - err = gr_gk20a_wait_idle(g); + err = g->ops.gr.init.wait_idle(g); if (err != 0) { goto error; } } - err = gr_gk20a_wait_fe_idle(g); + err = g->ops.gr.init.wait_fe_idle(g); if (err != 0) { goto error; } @@ -1180,7 +1096,7 @@ int gk20a_init_sw_bundle(struct gk20a *g) gk20a_writel(g, gr_pipe_bundle_config_r(), gr_pipe_bundle_config_override_pipe_mode_disabled_f()); - err = gr_gk20a_wait_idle(g); + err = g->ops.gr.init.wait_idle(g); /* restore fe_go_idle */ gk20a_writel(g, gr_fe_go_idle_timeout_r(), @@ -1248,7 +1164,7 @@ int gr_gk20a_init_golden_ctx_image(struct gk20a *g, goto clean_up; } - err = gr_gk20a_wait_idle(g); + err = g->ops.gr.init.wait_idle(g); /* load ctx init */ for (i = 0; i < sw_ctx_load->count; i++) { @@ -1262,7 +1178,7 @@ int gr_gk20a_init_golden_ctx_image(struct gk20a *g, nvgpu_cg_blcg_gr_load_enable(g); - err = gr_gk20a_wait_idle(g); + err = g->ops.gr.init.wait_idle(g); if (err != 0) { goto clean_up; } @@ -1285,7 +1201,7 @@ int gr_gk20a_init_golden_ctx_image(struct gk20a *g, goto clean_up; } - err = gr_gk20a_wait_idle(g); + err = g->ops.gr.init.wait_idle(g); if (err != 0) { goto restore_fe_go_idle; } @@ -1300,7 +1216,7 @@ restore_fe_go_idle: gk20a_writel(g, gr_fe_go_idle_timeout_r(), gr_fe_go_idle_timeout_count_prod_f()); - if ((err != 0) || (gr_gk20a_wait_idle(g) != 0)) { + if ((err != 0) || (g->ops.gr.init.wait_idle(g) != 0)) { goto clean_up; } @@ -1324,7 +1240,7 @@ restore_fe_go_idle: sw_method_init->l[i].addr); } - err = gr_gk20a_wait_idle(g); + err = g->ops.gr.init.wait_idle(g); if (err != 0) { goto clean_up; } @@ -2811,7 +2727,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) goto out; } - err = gr_gk20a_wait_idle(g); + err = g->ops.gr.init.wait_idle(g); out: nvgpu_log_fn(g, "done"); return err; @@ -2915,7 +2831,7 @@ static int gk20a_init_gr_reset_enable_hw(struct gk20a *g) goto out; } - err = gr_gk20a_wait_idle(g); + err = g->ops.gr.init.wait_idle(g); if (err != 0) { goto out; } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 6694b0456..e9ab6bdee 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -457,7 +457,6 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g, int gr_gk20a_set_sm_debug_mode(struct gk20a *g, struct channel_gk20a *ch, u64 sms, bool enable); bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch); -int gr_gk20a_wait_idle(struct gk20a *g); int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, bool *post_event, struct channel_gk20a *fault_ch, u32 *hww_global_esr); @@ -513,8 +512,6 @@ int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm); u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm); -int gr_gk20a_wait_fe_idle(struct gk20a *g); - struct dbg_session_gk20a; bool gr_gk20a_suspend_context(struct channel_gk20a *ch); diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 119193c91..94757c057 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -272,7 +272,7 @@ static const struct gpu_ops gm20b_ops = { .get_fbp_en_mask = gr_gm20b_get_fbp_en_mask, .get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask, .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info, - .wait_empty = gr_gk20a_wait_idle, + .wait_empty = gm20b_gr_init_wait_idle, .init_cyclestats = gr_gm20b_init_cyclestats, .set_sm_debug_mode = gr_gk20a_set_sm_debug_mode, .bpt_reg_info = gr_gm20b_bpt_reg_info, @@ -428,6 +428,8 @@ static const struct gpu_ops gm20b_ops = { gm20b_gr_init_fe_pwr_mode_force_on, .override_context_reset = gm20b_gr_init_override_context_reset, + .wait_idle = gm20b_gr_init_wait_idle, + .wait_fe_idle = gm20b_gr_init_wait_fe_idle, }, }, .fb = { diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index feb5868de..3cec9c06d 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -477,6 +477,8 @@ static const struct gpu_ops gp10b_ops = { gm20b_gr_init_fe_pwr_mode_force_on, .override_context_reset = gm20b_gr_init_override_context_reset, + .wait_idle = gm20b_gr_init_wait_idle, + .wait_fe_idle = gm20b_gr_init_wait_fe_idle, }, }, .fb = { diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 1006a7f01..29203a5d6 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -613,6 +613,8 @@ static const struct gpu_ops gv100_ops = { gm20b_gr_init_fe_pwr_mode_force_on, .override_context_reset = gm20b_gr_init_override_context_reset, + .wait_idle = gm20b_gr_init_wait_idle, + .wait_fe_idle = gm20b_gr_init_wait_fe_idle, }, }, .fb = { diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index d701bde67..47ae65e18 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -2556,7 +2557,7 @@ static int gv11b_write_bundle_veid_state(struct gk20a *g, u32 index) sw_veid_bundle_init->l[index].addr | gr_pipe_bundle_address_veid_f(j)); - err = gr_gk20a_wait_fe_idle(g); + err = g->ops.gr.init.wait_idle(g); } return err; } @@ -2586,7 +2587,7 @@ int gr_gv11b_init_sw_veid_bundle(struct gk20a *g) nvgpu_log_fn(g, "go idle bundle"); gk20a_writel(g, gr_pipe_bundle_address_r(), sw_veid_bundle_init->l[i].addr); - err = gr_gk20a_wait_idle(g); + err = g->ops.gr.init.wait_idle(g); } else { err = gv11b_write_bundle_veid_state(g, i); } diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 067416077..252e70435 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -570,6 +570,8 @@ static const struct gpu_ops gv11b_ops = { gm20b_gr_init_fe_pwr_mode_force_on, .override_context_reset = gm20b_gr_init_override_context_reset, + .wait_idle = gm20b_gr_init_wait_idle, + .wait_fe_idle = gm20b_gr_init_wait_fe_idle, }, }, .fb = { diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c index deda96242..04a1a3ad3 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include "gr_init_gm20b.h" @@ -33,6 +35,97 @@ #define FE_PWR_MODE_TIMEOUT_DEFAULT_US 10U #define FECS_CTXSW_RESET_DELAY_US 10U +int gm20b_gr_init_wait_idle(struct gk20a *g) +{ + u32 delay = NVGPU_GR_IDLE_CHECK_DEFAULT_US; + u32 gr_engine_id; + int err = -EAGAIN; + bool ctxsw_active; + bool gr_busy; + bool ctx_status_invalid; + struct nvgpu_engine_status_info engine_status; + struct nvgpu_timeout timeout; + + nvgpu_log_fn(g, " "); + + gr_engine_id = nvgpu_engine_get_gr_id(g); + + err = nvgpu_timeout_init(g, &timeout, nvgpu_gr_get_idle_timeout(g), + NVGPU_TIMER_CPU_TIMER); + if (err != 0) { + return err; + } + + do { + /* + * fmodel: host gets fifo_engine_status(gr) from gr + * only when gr_status is read + */ + (void) nvgpu_readl(g, gr_status_r()); + + g->ops.engine_status.read_engine_status_info(g, gr_engine_id, + &engine_status); + + ctxsw_active = engine_status.ctxsw_in_progress; + + ctx_status_invalid = nvgpu_engine_status_is_ctxsw_invalid( + &engine_status); + + gr_busy = (nvgpu_readl(g, gr_engine_status_r()) & + gr_engine_status_value_busy_f()) != 0U; + + if (ctx_status_invalid || (!gr_busy && !ctxsw_active)) { + nvgpu_log_fn(g, "done"); + return 0; + } + + nvgpu_usleep_range(delay, delay * 2U); + delay = min_t(u32, delay << 1, NVGPU_GR_IDLE_CHECK_MAX_US); + + } while (nvgpu_timeout_expired(&timeout) == 0); + + nvgpu_err(g, "timeout, ctxsw busy : %d, gr busy : %d", + ctxsw_active, gr_busy); + + return err; +} + +int gm20b_gr_init_wait_fe_idle(struct gk20a *g) +{ + u32 val; + u32 delay = NVGPU_GR_IDLE_CHECK_DEFAULT_US; + struct nvgpu_timeout timeout; + int err = -EAGAIN; + + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + return 0; + } + + nvgpu_log_fn(g, " "); + + err = nvgpu_timeout_init(g, &timeout, nvgpu_gr_get_idle_timeout(g), + NVGPU_TIMER_CPU_TIMER); + if (err != 0) { + return err; + } + + do { + val = nvgpu_readl(g, gr_status_r()); + + if (gr_status_fe_method_lower_v(val) == 0U) { + nvgpu_log_fn(g, "done"); + return 0; + } + + nvgpu_usleep_range(delay, delay * 2U); + delay = min_t(u32, delay << 1, NVGPU_GR_IDLE_CHECK_MAX_US); + } while (nvgpu_timeout_expired(&timeout) == 0); + + nvgpu_err(g, "timeout, fe busy : %x", val); + + return err; +} + int gm20b_gr_init_fe_pwr_mode_force_on(struct gk20a *g, bool force_on) { struct nvgpu_timeout timeout; diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h index 4ce5e39da..2926b9c0c 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h @@ -27,6 +27,8 @@ struct gk20a; +int gm20b_gr_init_wait_idle(struct gk20a *g); +int gm20b_gr_init_wait_fe_idle(struct gk20a *g); int gm20b_gr_init_fe_pwr_mode_force_on(struct gk20a *g, bool force_on); void gm20b_gr_init_override_context_reset(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 5258c6405..d2c636d21 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -636,6 +636,8 @@ struct gpu_ops { } hwpm_map; struct { + int (*wait_idle)(struct gk20a *g); + int (*wait_fe_idle)(struct gk20a *g); int (*fe_pwr_mode_force_on)(struct gk20a *g, bool force_on); void (*override_context_reset)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/tu104/gr_tu104.c b/drivers/gpu/nvgpu/tu104/gr_tu104.c index 9de1bccb9..0b86d9ec3 100644 --- a/drivers/gpu/nvgpu/tu104/gr_tu104.c +++ b/drivers/gpu/nvgpu/tu104/gr_tu104.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "gk20a/gr_gk20a.h" #include "gk20a/gr_pri_gk20a.h" @@ -111,9 +112,9 @@ int gr_tu104_init_sw_bundle64(struct gk20a *g) if (gr_pipe_bundle_address_value_v(sw_bundle64_init->l[i].addr) == GR_GO_IDLE_BUNDLE) { - err = gr_gk20a_wait_idle(g); + err = g->ops.gr.init.wait_idle(g); } else if (nvgpu_platform_is_silicon(g)) { - err = gr_gk20a_wait_fe_idle(g); + err = g->ops.gr.init.wait_fe_idle(g); } if (err != 0) { break; diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 4d0dddfab..08d19ed3b 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -639,6 +639,8 @@ static const struct gpu_ops tu104_ops = { gm20b_gr_init_fe_pwr_mode_force_on, .override_context_reset = gm20b_gr_init_override_context_reset, + .wait_idle = gm20b_gr_init_wait_idle, + .wait_fe_idle = gm20b_gr_init_wait_fe_idle, }, }, .fb = {