gpu: nvgpu: move wait_empty hal to hal.gr.init

Move wait_empty hal function to hal.gr.init.
Remove gv11b_gr_wait_empty hal function as it use the same
implementation in gp10b_gr_wait_empty and has no register difference.

JIRA NVGPU-2951

Change-Id: I4035e7cc5bf1510db9a250747467a873777526cf
Signed-off-by: Vinod G <vinodg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2075950
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Vinod G
2019-03-18 18:18:49 -07:00
committed by mobile promotions
parent bd668dddc7
commit f8b7a4f6d2
16 changed files with 77 additions and 140 deletions

View File

@@ -147,7 +147,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.get_fbp_en_mask = vgpu_gr_get_fbp_en_mask,
.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
.wait_empty = NULL,
.init_cyclestats = vgpu_gr_init_cyclestats,
.set_sm_debug_mode = vgpu_gr_set_sm_debug_mode,
.bpt_reg_info = NULL,

View File

@@ -165,7 +165,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.get_fbp_en_mask = vgpu_gr_get_fbp_en_mask,
.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
.init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info,
.wait_empty = NULL,
.init_cyclestats = vgpu_gr_init_cyclestats,
.set_sm_debug_mode = vgpu_gr_set_sm_debug_mode,
.bpt_reg_info = NULL,

View File

@@ -4171,7 +4171,7 @@ int gk20a_gr_suspend(struct gk20a *g)
nvgpu_log_fn(g, " ");
ret = g->ops.gr.wait_empty(g);
ret = g->ops.gr.init.wait_empty(g);
if (ret != 0) {
return ret;
}

View File

@@ -270,7 +270,6 @@ static const struct gpu_ops gm20b_ops = {
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
.wait_empty = gm20b_gr_init_wait_idle,
.init_cyclestats = gr_gm20b_init_cyclestats,
.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
.bpt_reg_info = gr_gm20b_bpt_reg_info,
@@ -425,6 +424,7 @@ static const struct gpu_ops gm20b_ops = {
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
.wait_empty = gm20b_gr_init_wait_idle,
.wait_idle = gm20b_gr_init_wait_idle,
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
.fe_pwr_mode_force_on =

View File

@@ -1207,67 +1207,6 @@ int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
return 0;
}
static bool gr_activity_empty_or_preempted(u32 val)
{
while(val != 0U) {
u32 v = val & 7U;
if (v != gr_activity_4_gpc0_empty_v() &&
v != gr_activity_4_gpc0_preempted_v()) {
return false;
}
val >>= 3;
}
return true;
}
int gr_gp10b_wait_empty(struct gk20a *g)
{
u32 delay = GR_IDLE_CHECK_DEFAULT;
bool ctxsw_active;
bool gr_busy;
u32 gr_status;
u32 activity0, activity1, activity2, activity4;
struct nvgpu_timeout timeout;
nvgpu_log_fn(g, " ");
nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
NVGPU_TIMER_CPU_TIMER);
do {
/* fmodel: host gets fifo_engine_status(gr) from gr
only when gr_status is read */
gr_status = gk20a_readl(g, gr_status_r());
ctxsw_active = (gr_status & BIT32(7)) != 0U;
activity0 = gk20a_readl(g, gr_activity_0_r());
activity1 = gk20a_readl(g, gr_activity_1_r());
activity2 = gk20a_readl(g, gr_activity_2_r());
activity4 = gk20a_readl(g, gr_activity_4_r());
gr_busy = !(gr_activity_empty_or_preempted(activity0) &&
gr_activity_empty_or_preempted(activity1) &&
activity2 == 0U &&
gr_activity_empty_or_preempted(activity4));
if (!gr_busy && !ctxsw_active) {
nvgpu_log_fn(g, "done");
return 0;
}
nvgpu_usleep_range(delay, delay * 2U);
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
} while (nvgpu_timeout_expired(&timeout) == 0);
nvgpu_err(g,
"timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x",
ctxsw_active, gr_busy, activity0, activity1, activity2, activity4);
return -EAGAIN;
}
void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
u64 addr, bool patch)

View File

@@ -97,7 +97,6 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx);
int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o);
int gr_gp10b_wait_empty(struct gk20a *g);
void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
u64 addr, bool patch);

View File

@@ -292,7 +292,6 @@ static const struct gpu_ops gp10b_ops = {
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
.wait_empty = gr_gp10b_wait_empty,
.init_cyclestats = gr_gm20b_init_cyclestats,
.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
.bpt_reg_info = gr_gm20b_bpt_reg_info,
@@ -495,6 +494,7 @@ static const struct gpu_ops gp10b_ops = {
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
.wait_empty = gp10b_gr_init_wait_empty,
.wait_idle = gm20b_gr_init_wait_idle,
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
.fe_pwr_mode_force_on =

View File

@@ -397,7 +397,6 @@ static const struct gpu_ops gv100_ops = {
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info,
.wait_empty = gr_gv11b_wait_empty,
.init_cyclestats = gr_gm20b_init_cyclestats,
.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode,
.bpt_reg_info = gv11b_gr_bpt_reg_info,
@@ -632,6 +631,7 @@ static const struct gpu_ops gv100_ops = {
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
.wait_empty = gp10b_gr_init_wait_empty,
.wait_idle = gm20b_gr_init_wait_idle,
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
.fe_pwr_mode_force_on =

View File

@@ -1825,73 +1825,6 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
return 0;
}
static bool gr_activity_empty_or_preempted(u32 val)
{
while (val != 0U) {
u32 v = val & 7U;
if (v != gr_activity_4_gpc0_empty_v() &&
v != gr_activity_4_gpc0_preempted_v()) {
return false;
}
val >>= 3;
}
return true;
}
int gr_gv11b_wait_empty(struct gk20a *g)
{
u32 delay = GR_IDLE_CHECK_DEFAULT;
bool ctxsw_active;
bool gr_busy;
u32 gr_status;
u32 activity0, activity1, activity2, activity4;
struct nvgpu_timeout timeout;
int err;
nvgpu_log_fn(g, " ");
err = nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
NVGPU_TIMER_CPU_TIMER);
if (err != 0) {
nvgpu_err(g, "timeout_init failed: %d", err);
return err;
}
do {
/* fmodel: host gets fifo_engine_status(gr) from gr
only when gr_status is read */
gr_status = gk20a_readl(g, gr_status_r());
ctxsw_active = (gr_status & BIT32(7)) != 0U;
activity0 = gk20a_readl(g, gr_activity_0_r());
activity1 = gk20a_readl(g, gr_activity_1_r());
activity2 = gk20a_readl(g, gr_activity_2_r());
activity4 = gk20a_readl(g, gr_activity_4_r());
gr_busy = !(gr_activity_empty_or_preempted(activity0) &&
gr_activity_empty_or_preempted(activity1) &&
activity2 == 0U &&
gr_activity_empty_or_preempted(activity4));
if (!gr_busy && !ctxsw_active) {
nvgpu_log_fn(g, "done");
return 0;
}
nvgpu_usleep_range(delay, delay * 2U);
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
} while (nvgpu_timeout_expired(&timeout) == 0);
nvgpu_err(g,
"timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x",
ctxsw_active, gr_busy, activity0, activity1, activity2, activity4);
return -EAGAIN;
}
void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
u64 addr, bool patch)

View File

@@ -106,7 +106,6 @@ void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data);
int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
struct gk20a_debug_output *o);
int gr_gv11b_wait_empty(struct gk20a *g);
void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx,
u64 addr, bool patch);

View File

@@ -349,7 +349,6 @@ static const struct gpu_ops gv11b_ops = {
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info,
.wait_empty = gr_gv11b_wait_empty,
.init_cyclestats = gr_gm20b_init_cyclestats,
.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode,
.bpt_reg_info = gv11b_gr_bpt_reg_info,
@@ -591,6 +590,7 @@ static const struct gpu_ops gv11b_ops = {
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
.wait_empty = gp10b_gr_init_wait_empty,
.wait_idle = gm20b_gr_init_wait_idle,
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
.fe_pwr_mode_force_on =

View File

@@ -445,7 +445,7 @@ int gm20b_gr_init_wait_fe_idle(struct gk20a *g)
nvgpu_log_fn(g, " ");
err = nvgpu_timeout_init(g, &timeout, nvgpu_gr_get_idle_timeout(g),
NVGPU_TIMER_CPU_TIMER);
NVGPU_TIMER_CPU_TIMER);
if (err != 0) {
return err;
}

View File

@@ -24,6 +24,7 @@
#include <nvgpu/io.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/gr.h>
#include "gr_init_gm20b.h"
#include "gr_init_gp10b.h"
@@ -79,6 +80,73 @@ int gp10b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id,
return 0;
}
static bool gr_activity_empty_or_preempted(u32 val)
{
while (val != 0U) {
u32 v = val & 7U;
if (v != gr_activity_4_gpc0_empty_v() &&
v != gr_activity_4_gpc0_preempted_v()) {
return false;
}
val >>= 3;
}
return true;
}
int gp10b_gr_init_wait_empty(struct gk20a *g)
{
u32 delay = NVGPU_GR_IDLE_CHECK_DEFAULT_US;
bool ctxsw_active;
bool gr_busy;
u32 gr_status;
u32 activity0, activity1, activity2, activity4;
struct nvgpu_timeout timeout;
int err;
nvgpu_log_fn(g, " ");
err = nvgpu_timeout_init(g, &timeout, nvgpu_gr_get_idle_timeout(g),
NVGPU_TIMER_CPU_TIMER);
if (err != 0) {
nvgpu_err(g, "timeout_init failed: %d", err);
return err;
}
do {
/* fmodel: host gets fifo_engine_status(gr) from gr
only when gr_status is read */
gr_status = nvgpu_readl(g, gr_status_r());
ctxsw_active = (gr_status & BIT32(7)) != 0U;
activity0 = nvgpu_readl(g, gr_activity_0_r());
activity1 = nvgpu_readl(g, gr_activity_1_r());
activity2 = nvgpu_readl(g, gr_activity_2_r());
activity4 = nvgpu_readl(g, gr_activity_4_r());
gr_busy = !(gr_activity_empty_or_preempted(activity0) &&
gr_activity_empty_or_preempted(activity1) &&
activity2 == 0U &&
gr_activity_empty_or_preempted(activity4));
if (!gr_busy && !ctxsw_active) {
nvgpu_log_fn(g, "done");
return 0;
}
nvgpu_usleep_range(delay, delay * 2U);
delay = min_t(u32, delay << 1, NVGPU_GR_IDLE_CHECK_MAX_US);
} while (nvgpu_timeout_expired(&timeout) == 0);
nvgpu_err(g,
"timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x",
ctxsw_active, gr_busy, activity0, activity1, activity2, activity4);
return -EAGAIN;
}
int gp10b_gr_init_fs_state(struct gk20a *g)
{
u32 data;

View File

@@ -30,6 +30,7 @@ struct gk20a;
u32 gp10b_gr_init_get_sm_id_size(void);
int gp10b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id,
struct nvgpu_gr_config *gr_config);
int gp10b_gr_init_wait_empty(struct gk20a *g);
int gp10b_gr_init_fs_state(struct gk20a *g);
int gp10b_gr_init_preemption_state(struct gk20a *g, u32 gfxp_wfi_timeout_count,
bool gfxp_wfi_timeout_unit_usec);

View File

@@ -354,7 +354,6 @@ struct gpu_ops {
u32* (*get_rop_l2_en_mask)(struct gk20a *g);
void (*init_sm_dsm_reg_info)(void);
void (*init_ovr_sm_dsm_perf)(void);
int (*wait_empty)(struct gk20a *g);
void (*init_cyclestats)(struct gk20a *g);
int (*set_sm_debug_mode)(struct gk20a *g, struct channel_gk20a *ch,
u64 sms, bool enable);
@@ -678,6 +677,7 @@ struct gpu_ops {
void (*cwd_gpcs_tpcs_num)(struct gk20a *g,
u32 gpc_count,
u32 tpc_count);
int (*wait_empty)(struct gk20a *g);
int (*wait_idle)(struct gk20a *g);
int (*wait_fe_idle)(struct gk20a *g);
int (*fe_pwr_mode_force_on)(struct gk20a *g,

View File

@@ -417,7 +417,6 @@ static const struct gpu_ops tu104_ops = {
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
.init_sm_dsm_reg_info = gr_tu104_init_sm_dsm_reg_info,
.wait_empty = gr_gv11b_wait_empty,
.init_cyclestats = gr_gm20b_init_cyclestats,
.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode,
.bpt_reg_info = gv11b_gr_bpt_reg_info,
@@ -660,6 +659,7 @@ static const struct gpu_ops tu104_ops = {
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
.wait_empty = gp10b_gr_init_wait_empty,
.wait_idle = gm20b_gr_init_wait_idle,
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
.fe_pwr_mode_force_on =