gpu: nvgpu: add new gr.init hals

create new hals for wait_idle and wait_fe_idle under gr.init.

modify functions to following hals and use same hals for all chips.
gr_gk20a_wait_idle -> gm20b_gr_init_wait_idle
gr_gk20a_wait_fe_idle -> gm20b_gr_init_wait_fe_idle

JIRA NVGPU-2951

Change-Id: Ie60675a08cba12e31557711b6f05f06879de8965
Signed-off-by: Vinod G <vinodg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2072051
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Vinod G
2019-03-13 13:15:19 -07:00
committed by mobile promotions
parent 79d332aca9
commit caac47c4fa
12 changed files with 124 additions and 102 deletions

View File

@@ -225,90 +225,6 @@ static void gr_gk20a_load_falcon_imem(struct gk20a *g)
}
}
int gr_gk20a_wait_idle(struct gk20a *g)
{
u32 delay = GR_IDLE_CHECK_DEFAULT;
bool ctxsw_active;
bool gr_busy;
u32 gr_engine_id;
struct nvgpu_engine_status_info engine_status;
bool ctx_status_invalid;
struct nvgpu_timeout timeout;
nvgpu_log_fn(g, " ");
gr_engine_id = nvgpu_engine_get_gr_id(g);
nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
NVGPU_TIMER_CPU_TIMER);
do {
/* fmodel: host gets fifo_engine_status(gr) from gr
only when gr_status is read */
(void) gk20a_readl(g, gr_status_r());
g->ops.engine_status.read_engine_status_info(g, gr_engine_id,
&engine_status);
ctxsw_active = engine_status.ctxsw_in_progress;
ctx_status_invalid = nvgpu_engine_status_is_ctxsw_invalid(
&engine_status);
gr_busy = (gk20a_readl(g, gr_engine_status_r()) &
gr_engine_status_value_busy_f()) != 0U;
if (ctx_status_invalid || (!gr_busy && !ctxsw_active)) {
nvgpu_log_fn(g, "done");
return 0;
}
nvgpu_usleep_range(delay, delay * 2U);
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
} while (nvgpu_timeout_expired(&timeout) == 0);
nvgpu_err(g,
"timeout, ctxsw busy : %d, gr busy : %d",
ctxsw_active, gr_busy);
return -EAGAIN;
}
int gr_gk20a_wait_fe_idle(struct gk20a *g)
{
u32 val;
u32 delay = GR_IDLE_CHECK_DEFAULT;
struct nvgpu_timeout timeout;
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
return 0;
}
nvgpu_log_fn(g, " ");
nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
NVGPU_TIMER_CPU_TIMER);
do {
val = gk20a_readl(g, gr_status_r());
if (gr_status_fe_method_lower_v(val) == 0U) {
nvgpu_log_fn(g, "done");
return 0;
}
nvgpu_usleep_range(delay, delay * 2U);
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
} while (nvgpu_timeout_expired(&timeout) == 0);
nvgpu_err(g,
"timeout, fe busy : %x", val);
return -EAGAIN;
}
int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
u32 *mailbox_ret, u32 opc_success,
u32 mailbox_ok, u32 opc_fail,
@@ -1150,13 +1066,13 @@ int gk20a_init_sw_bundle(struct gk20a *g)
if (gr_pipe_bundle_address_value_v(sw_bundle_init->l[i].addr) ==
GR_GO_IDLE_BUNDLE) {
err = gr_gk20a_wait_idle(g);
err = g->ops.gr.init.wait_idle(g);
if (err != 0) {
goto error;
}
}
err = gr_gk20a_wait_fe_idle(g);
err = g->ops.gr.init.wait_fe_idle(g);
if (err != 0) {
goto error;
}
@@ -1180,7 +1096,7 @@ int gk20a_init_sw_bundle(struct gk20a *g)
gk20a_writel(g, gr_pipe_bundle_config_r(),
gr_pipe_bundle_config_override_pipe_mode_disabled_f());
err = gr_gk20a_wait_idle(g);
err = g->ops.gr.init.wait_idle(g);
/* restore fe_go_idle */
gk20a_writel(g, gr_fe_go_idle_timeout_r(),
@@ -1248,7 +1164,7 @@ int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
goto clean_up;
}
err = gr_gk20a_wait_idle(g);
err = g->ops.gr.init.wait_idle(g);
/* load ctx init */
for (i = 0; i < sw_ctx_load->count; i++) {
@@ -1262,7 +1178,7 @@ int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
nvgpu_cg_blcg_gr_load_enable(g);
err = gr_gk20a_wait_idle(g);
err = g->ops.gr.init.wait_idle(g);
if (err != 0) {
goto clean_up;
}
@@ -1285,7 +1201,7 @@ int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
goto clean_up;
}
err = gr_gk20a_wait_idle(g);
err = g->ops.gr.init.wait_idle(g);
if (err != 0) {
goto restore_fe_go_idle;
}
@@ -1300,7 +1216,7 @@ restore_fe_go_idle:
gk20a_writel(g, gr_fe_go_idle_timeout_r(),
gr_fe_go_idle_timeout_count_prod_f());
if ((err != 0) || (gr_gk20a_wait_idle(g) != 0)) {
if ((err != 0) || (g->ops.gr.init.wait_idle(g) != 0)) {
goto clean_up;
}
@@ -1324,7 +1240,7 @@ restore_fe_go_idle:
sw_method_init->l[i].addr);
}
err = gr_gk20a_wait_idle(g);
err = g->ops.gr.init.wait_idle(g);
if (err != 0) {
goto clean_up;
}
@@ -2811,7 +2727,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
goto out;
}
err = gr_gk20a_wait_idle(g);
err = g->ops.gr.init.wait_idle(g);
out:
nvgpu_log_fn(g, "done");
return err;
@@ -2915,7 +2831,7 @@ static int gk20a_init_gr_reset_enable_hw(struct gk20a *g)
goto out;
}
err = gr_gk20a_wait_idle(g);
err = g->ops.gr.init.wait_idle(g);
if (err != 0) {
goto out;
}

View File

@@ -457,7 +457,6 @@ void gk20a_gr_suspend_all_sms(struct gk20a *g,
int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
struct channel_gk20a *ch, u64 sms, bool enable);
bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
int gr_gk20a_wait_idle(struct gk20a *g);
int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
bool *post_event, struct channel_gk20a *fault_ch,
u32 *hww_global_esr);
@@ -513,8 +512,6 @@ int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
u32 gk20a_gr_get_sm_hww_warp_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm);
u32 gk20a_gr_get_sm_hww_global_esr(struct gk20a *g, u32 gpc, u32 tpc, u32 sm);
int gr_gk20a_wait_fe_idle(struct gk20a *g);
struct dbg_session_gk20a;
bool gr_gk20a_suspend_context(struct channel_gk20a *ch);

View File

@@ -272,7 +272,7 @@ static const struct gpu_ops gm20b_ops = {
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
.wait_empty = gr_gk20a_wait_idle,
.wait_empty = gm20b_gr_init_wait_idle,
.init_cyclestats = gr_gm20b_init_cyclestats,
.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
.bpt_reg_info = gr_gm20b_bpt_reg_info,
@@ -428,6 +428,8 @@ static const struct gpu_ops gm20b_ops = {
gm20b_gr_init_fe_pwr_mode_force_on,
.override_context_reset =
gm20b_gr_init_override_context_reset,
.wait_idle = gm20b_gr_init_wait_idle,
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
},
},
.fb = {

View File

@@ -477,6 +477,8 @@ static const struct gpu_ops gp10b_ops = {
gm20b_gr_init_fe_pwr_mode_force_on,
.override_context_reset =
gm20b_gr_init_override_context_reset,
.wait_idle = gm20b_gr_init_wait_idle,
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
},
},
.fb = {

View File

@@ -613,6 +613,8 @@ static const struct gpu_ops gv100_ops = {
gm20b_gr_init_fe_pwr_mode_force_on,
.override_context_reset =
gm20b_gr_init_override_context_reset,
.wait_idle = gm20b_gr_init_wait_idle,
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
},
},
.fb = {

View File

@@ -41,6 +41,7 @@
#include <nvgpu/gr/subctx.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/gr.h>
#include <nvgpu/channel.h>
#include <nvgpu/nvgpu_err.h>
#include <nvgpu/engines.h>
@@ -2556,7 +2557,7 @@ static int gv11b_write_bundle_veid_state(struct gk20a *g, u32 index)
sw_veid_bundle_init->l[index].addr |
gr_pipe_bundle_address_veid_f(j));
err = gr_gk20a_wait_fe_idle(g);
err = g->ops.gr.init.wait_idle(g);
}
return err;
}
@@ -2586,7 +2587,7 @@ int gr_gv11b_init_sw_veid_bundle(struct gk20a *g)
nvgpu_log_fn(g, "go idle bundle");
gk20a_writel(g, gr_pipe_bundle_address_r(),
sw_veid_bundle_init->l[i].addr);
err = gr_gk20a_wait_idle(g);
err = g->ops.gr.init.wait_idle(g);
} else {
err = gv11b_write_bundle_veid_state(g, i);
}

View File

@@ -570,6 +570,8 @@ static const struct gpu_ops gv11b_ops = {
gm20b_gr_init_fe_pwr_mode_force_on,
.override_context_reset =
gm20b_gr_init_override_context_reset,
.wait_idle = gm20b_gr_init_wait_idle,
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
},
},
.fb = {

View File

@@ -24,6 +24,8 @@
#include <nvgpu/io.h>
#include <nvgpu/timers.h>
#include <nvgpu/enabled.h>
#include <nvgpu/engine_status.h>
#include <nvgpu/gr/gr.h>
#include "gr_init_gm20b.h"
@@ -33,6 +35,97 @@
#define FE_PWR_MODE_TIMEOUT_DEFAULT_US 10U
#define FECS_CTXSW_RESET_DELAY_US 10U
int gm20b_gr_init_wait_idle(struct gk20a *g)
{
u32 delay = NVGPU_GR_IDLE_CHECK_DEFAULT_US;
u32 gr_engine_id;
int err = -EAGAIN;
bool ctxsw_active;
bool gr_busy;
bool ctx_status_invalid;
struct nvgpu_engine_status_info engine_status;
struct nvgpu_timeout timeout;
nvgpu_log_fn(g, " ");
gr_engine_id = nvgpu_engine_get_gr_id(g);
err = nvgpu_timeout_init(g, &timeout, nvgpu_gr_get_idle_timeout(g),
NVGPU_TIMER_CPU_TIMER);
if (err != 0) {
return err;
}
do {
/*
* fmodel: host gets fifo_engine_status(gr) from gr
* only when gr_status is read
*/
(void) nvgpu_readl(g, gr_status_r());
g->ops.engine_status.read_engine_status_info(g, gr_engine_id,
&engine_status);
ctxsw_active = engine_status.ctxsw_in_progress;
ctx_status_invalid = nvgpu_engine_status_is_ctxsw_invalid(
&engine_status);
gr_busy = (nvgpu_readl(g, gr_engine_status_r()) &
gr_engine_status_value_busy_f()) != 0U;
if (ctx_status_invalid || (!gr_busy && !ctxsw_active)) {
nvgpu_log_fn(g, "done");
return 0;
}
nvgpu_usleep_range(delay, delay * 2U);
delay = min_t(u32, delay << 1, NVGPU_GR_IDLE_CHECK_MAX_US);
} while (nvgpu_timeout_expired(&timeout) == 0);
nvgpu_err(g, "timeout, ctxsw busy : %d, gr busy : %d",
ctxsw_active, gr_busy);
return err;
}
int gm20b_gr_init_wait_fe_idle(struct gk20a *g)
{
u32 val;
u32 delay = NVGPU_GR_IDLE_CHECK_DEFAULT_US;
struct nvgpu_timeout timeout;
int err = -EAGAIN;
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
return 0;
}
nvgpu_log_fn(g, " ");
err = nvgpu_timeout_init(g, &timeout, nvgpu_gr_get_idle_timeout(g),
NVGPU_TIMER_CPU_TIMER);
if (err != 0) {
return err;
}
do {
val = nvgpu_readl(g, gr_status_r());
if (gr_status_fe_method_lower_v(val) == 0U) {
nvgpu_log_fn(g, "done");
return 0;
}
nvgpu_usleep_range(delay, delay * 2U);
delay = min_t(u32, delay << 1, NVGPU_GR_IDLE_CHECK_MAX_US);
} while (nvgpu_timeout_expired(&timeout) == 0);
nvgpu_err(g, "timeout, fe busy : %x", val);
return err;
}
int gm20b_gr_init_fe_pwr_mode_force_on(struct gk20a *g, bool force_on)
{
struct nvgpu_timeout timeout;

View File

@@ -27,6 +27,8 @@
struct gk20a;
int gm20b_gr_init_wait_idle(struct gk20a *g);
int gm20b_gr_init_wait_fe_idle(struct gk20a *g);
int gm20b_gr_init_fe_pwr_mode_force_on(struct gk20a *g, bool force_on);
void gm20b_gr_init_override_context_reset(struct gk20a *g);

View File

@@ -636,6 +636,8 @@ struct gpu_ops {
} hwpm_map;
struct {
int (*wait_idle)(struct gk20a *g);
int (*wait_fe_idle)(struct gk20a *g);
int (*fe_pwr_mode_force_on)(struct gk20a *g,
bool force_on);
void (*override_context_reset)(struct gk20a *g);

View File

@@ -30,6 +30,7 @@
#include <nvgpu/gr/global_ctx.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/gr.h>
#include "gk20a/gr_gk20a.h"
#include "gk20a/gr_pri_gk20a.h"
@@ -111,9 +112,9 @@ int gr_tu104_init_sw_bundle64(struct gk20a *g)
if (gr_pipe_bundle_address_value_v(sw_bundle64_init->l[i].addr)
== GR_GO_IDLE_BUNDLE) {
err = gr_gk20a_wait_idle(g);
err = g->ops.gr.init.wait_idle(g);
} else if (nvgpu_platform_is_silicon(g)) {
err = gr_gk20a_wait_fe_idle(g);
err = g->ops.gr.init.wait_fe_idle(g);
}
if (err != 0) {
break;

View File

@@ -639,6 +639,8 @@ static const struct gpu_ops tu104_ops = {
gm20b_gr_init_fe_pwr_mode_force_on,
.override_context_reset =
gm20b_gr_init_override_context_reset,
.wait_idle = gm20b_gr_init_wait_idle,
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
},
},
.fb = {