mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: move wait_empty hal to hal.gr.init
Move wait_empty hal function to hal.gr.init. Remove gv11b_gr_wait_empty hal function as it use the same implementation in gp10b_gr_wait_empty and has no register difference. JIRA NVGPU-2951 Change-Id: I4035e7cc5bf1510db9a250747467a873777526cf Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2075950 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
bd668dddc7
commit
f8b7a4f6d2
@@ -147,7 +147,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
||||
.get_fbp_en_mask = vgpu_gr_get_fbp_en_mask,
|
||||
.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
|
||||
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
|
||||
.wait_empty = NULL,
|
||||
.init_cyclestats = vgpu_gr_init_cyclestats,
|
||||
.set_sm_debug_mode = vgpu_gr_set_sm_debug_mode,
|
||||
.bpt_reg_info = NULL,
|
||||
|
||||
@@ -165,7 +165,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
||||
.get_fbp_en_mask = vgpu_gr_get_fbp_en_mask,
|
||||
.get_rop_l2_en_mask = vgpu_gr_rop_l2_en_mask,
|
||||
.init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info,
|
||||
.wait_empty = NULL,
|
||||
.init_cyclestats = vgpu_gr_init_cyclestats,
|
||||
.set_sm_debug_mode = vgpu_gr_set_sm_debug_mode,
|
||||
.bpt_reg_info = NULL,
|
||||
|
||||
@@ -4171,7 +4171,7 @@ int gk20a_gr_suspend(struct gk20a *g)
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
ret = g->ops.gr.wait_empty(g);
|
||||
ret = g->ops.gr.init.wait_empty(g);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -270,7 +270,6 @@ static const struct gpu_ops gm20b_ops = {
|
||||
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
|
||||
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
|
||||
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
|
||||
.wait_empty = gm20b_gr_init_wait_idle,
|
||||
.init_cyclestats = gr_gm20b_init_cyclestats,
|
||||
.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
|
||||
.bpt_reg_info = gr_gm20b_bpt_reg_info,
|
||||
@@ -425,6 +424,7 @@ static const struct gpu_ops gm20b_ops = {
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
|
||||
.wait_empty = gm20b_gr_init_wait_idle,
|
||||
.wait_idle = gm20b_gr_init_wait_idle,
|
||||
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
|
||||
.fe_pwr_mode_force_on =
|
||||
|
||||
@@ -1207,67 +1207,6 @@ int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool gr_activity_empty_or_preempted(u32 val)
|
||||
{
|
||||
while(val != 0U) {
|
||||
u32 v = val & 7U;
|
||||
if (v != gr_activity_4_gpc0_empty_v() &&
|
||||
v != gr_activity_4_gpc0_preempted_v()) {
|
||||
return false;
|
||||
}
|
||||
val >>= 3;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int gr_gp10b_wait_empty(struct gk20a *g)
|
||||
{
|
||||
u32 delay = GR_IDLE_CHECK_DEFAULT;
|
||||
bool ctxsw_active;
|
||||
bool gr_busy;
|
||||
u32 gr_status;
|
||||
u32 activity0, activity1, activity2, activity4;
|
||||
struct nvgpu_timeout timeout;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
|
||||
NVGPU_TIMER_CPU_TIMER);
|
||||
|
||||
do {
|
||||
/* fmodel: host gets fifo_engine_status(gr) from gr
|
||||
only when gr_status is read */
|
||||
gr_status = gk20a_readl(g, gr_status_r());
|
||||
|
||||
ctxsw_active = (gr_status & BIT32(7)) != 0U;
|
||||
|
||||
activity0 = gk20a_readl(g, gr_activity_0_r());
|
||||
activity1 = gk20a_readl(g, gr_activity_1_r());
|
||||
activity2 = gk20a_readl(g, gr_activity_2_r());
|
||||
activity4 = gk20a_readl(g, gr_activity_4_r());
|
||||
|
||||
gr_busy = !(gr_activity_empty_or_preempted(activity0) &&
|
||||
gr_activity_empty_or_preempted(activity1) &&
|
||||
activity2 == 0U &&
|
||||
gr_activity_empty_or_preempted(activity4));
|
||||
|
||||
if (!gr_busy && !ctxsw_active) {
|
||||
nvgpu_log_fn(g, "done");
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvgpu_usleep_range(delay, delay * 2U);
|
||||
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
|
||||
} while (nvgpu_timeout_expired(&timeout) == 0);
|
||||
|
||||
nvgpu_err(g,
|
||||
"timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x",
|
||||
ctxsw_active, gr_busy, activity0, activity1, activity2, activity4);
|
||||
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
|
||||
struct nvgpu_gr_ctx *gr_ctx,
|
||||
u64 addr, bool patch)
|
||||
|
||||
@@ -97,7 +97,6 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
|
||||
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx);
|
||||
int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
|
||||
struct gk20a_debug_output *o);
|
||||
int gr_gp10b_wait_empty(struct gk20a *g);
|
||||
void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
|
||||
struct nvgpu_gr_ctx *gr_ctx,
|
||||
u64 addr, bool patch);
|
||||
|
||||
@@ -292,7 +292,6 @@ static const struct gpu_ops gp10b_ops = {
|
||||
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
|
||||
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
|
||||
.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
|
||||
.wait_empty = gr_gp10b_wait_empty,
|
||||
.init_cyclestats = gr_gm20b_init_cyclestats,
|
||||
.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
|
||||
.bpt_reg_info = gr_gm20b_bpt_reg_info,
|
||||
@@ -495,6 +494,7 @@ static const struct gpu_ops gp10b_ops = {
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
|
||||
.wait_empty = gp10b_gr_init_wait_empty,
|
||||
.wait_idle = gm20b_gr_init_wait_idle,
|
||||
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
|
||||
.fe_pwr_mode_force_on =
|
||||
|
||||
@@ -397,7 +397,6 @@ static const struct gpu_ops gv100_ops = {
|
||||
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
|
||||
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
|
||||
.init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info,
|
||||
.wait_empty = gr_gv11b_wait_empty,
|
||||
.init_cyclestats = gr_gm20b_init_cyclestats,
|
||||
.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode,
|
||||
.bpt_reg_info = gv11b_gr_bpt_reg_info,
|
||||
@@ -632,6 +631,7 @@ static const struct gpu_ops gv100_ops = {
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
|
||||
.wait_empty = gp10b_gr_init_wait_empty,
|
||||
.wait_idle = gm20b_gr_init_wait_idle,
|
||||
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
|
||||
.fe_pwr_mode_force_on =
|
||||
|
||||
@@ -1825,73 +1825,6 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool gr_activity_empty_or_preempted(u32 val)
|
||||
{
|
||||
while (val != 0U) {
|
||||
u32 v = val & 7U;
|
||||
if (v != gr_activity_4_gpc0_empty_v() &&
|
||||
v != gr_activity_4_gpc0_preempted_v()) {
|
||||
return false;
|
||||
}
|
||||
val >>= 3;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int gr_gv11b_wait_empty(struct gk20a *g)
|
||||
{
|
||||
u32 delay = GR_IDLE_CHECK_DEFAULT;
|
||||
bool ctxsw_active;
|
||||
bool gr_busy;
|
||||
u32 gr_status;
|
||||
u32 activity0, activity1, activity2, activity4;
|
||||
struct nvgpu_timeout timeout;
|
||||
int err;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
err = nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
|
||||
NVGPU_TIMER_CPU_TIMER);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "timeout_init failed: %d", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
do {
|
||||
/* fmodel: host gets fifo_engine_status(gr) from gr
|
||||
only when gr_status is read */
|
||||
gr_status = gk20a_readl(g, gr_status_r());
|
||||
|
||||
ctxsw_active = (gr_status & BIT32(7)) != 0U;
|
||||
|
||||
activity0 = gk20a_readl(g, gr_activity_0_r());
|
||||
activity1 = gk20a_readl(g, gr_activity_1_r());
|
||||
activity2 = gk20a_readl(g, gr_activity_2_r());
|
||||
activity4 = gk20a_readl(g, gr_activity_4_r());
|
||||
|
||||
gr_busy = !(gr_activity_empty_or_preempted(activity0) &&
|
||||
gr_activity_empty_or_preempted(activity1) &&
|
||||
activity2 == 0U &&
|
||||
gr_activity_empty_or_preempted(activity4));
|
||||
|
||||
if (!gr_busy && !ctxsw_active) {
|
||||
nvgpu_log_fn(g, "done");
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvgpu_usleep_range(delay, delay * 2U);
|
||||
delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
|
||||
|
||||
} while (nvgpu_timeout_expired(&timeout) == 0);
|
||||
|
||||
nvgpu_err(g,
|
||||
"timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x",
|
||||
ctxsw_active, gr_busy, activity0, activity1, activity2, activity4);
|
||||
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
|
||||
struct nvgpu_gr_ctx *gr_ctx,
|
||||
u64 addr, bool patch)
|
||||
|
||||
@@ -106,7 +106,6 @@ void gr_gv11b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
|
||||
void gr_gv11b_set_circular_buffer_size(struct gk20a *g, u32 data);
|
||||
int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
|
||||
struct gk20a_debug_output *o);
|
||||
int gr_gv11b_wait_empty(struct gk20a *g);
|
||||
void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
|
||||
struct nvgpu_gr_ctx *gr_ctx,
|
||||
u64 addr, bool patch);
|
||||
|
||||
@@ -349,7 +349,6 @@ static const struct gpu_ops gv11b_ops = {
|
||||
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
|
||||
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
|
||||
.init_sm_dsm_reg_info = gv11b_gr_init_sm_dsm_reg_info,
|
||||
.wait_empty = gr_gv11b_wait_empty,
|
||||
.init_cyclestats = gr_gm20b_init_cyclestats,
|
||||
.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode,
|
||||
.bpt_reg_info = gv11b_gr_bpt_reg_info,
|
||||
@@ -591,6 +590,7 @@ static const struct gpu_ops gv11b_ops = {
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
|
||||
.wait_empty = gp10b_gr_init_wait_empty,
|
||||
.wait_idle = gm20b_gr_init_wait_idle,
|
||||
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
|
||||
.fe_pwr_mode_force_on =
|
||||
|
||||
@@ -445,7 +445,7 @@ int gm20b_gr_init_wait_fe_idle(struct gk20a *g)
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
err = nvgpu_timeout_init(g, &timeout, nvgpu_gr_get_idle_timeout(g),
|
||||
NVGPU_TIMER_CPU_TIMER);
|
||||
NVGPU_TIMER_CPU_TIMER);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include <nvgpu/io.h>
|
||||
|
||||
#include <nvgpu/gr/config.h>
|
||||
#include <nvgpu/gr/gr.h>
|
||||
|
||||
#include "gr_init_gm20b.h"
|
||||
#include "gr_init_gp10b.h"
|
||||
@@ -79,6 +80,73 @@ int gp10b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool gr_activity_empty_or_preempted(u32 val)
|
||||
{
|
||||
while (val != 0U) {
|
||||
u32 v = val & 7U;
|
||||
|
||||
if (v != gr_activity_4_gpc0_empty_v() &&
|
||||
v != gr_activity_4_gpc0_preempted_v()) {
|
||||
return false;
|
||||
}
|
||||
val >>= 3;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int gp10b_gr_init_wait_empty(struct gk20a *g)
|
||||
{
|
||||
u32 delay = NVGPU_GR_IDLE_CHECK_DEFAULT_US;
|
||||
bool ctxsw_active;
|
||||
bool gr_busy;
|
||||
u32 gr_status;
|
||||
u32 activity0, activity1, activity2, activity4;
|
||||
struct nvgpu_timeout timeout;
|
||||
int err;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
err = nvgpu_timeout_init(g, &timeout, nvgpu_gr_get_idle_timeout(g),
|
||||
NVGPU_TIMER_CPU_TIMER);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "timeout_init failed: %d", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
do {
|
||||
/* fmodel: host gets fifo_engine_status(gr) from gr
|
||||
only when gr_status is read */
|
||||
gr_status = nvgpu_readl(g, gr_status_r());
|
||||
|
||||
ctxsw_active = (gr_status & BIT32(7)) != 0U;
|
||||
|
||||
activity0 = nvgpu_readl(g, gr_activity_0_r());
|
||||
activity1 = nvgpu_readl(g, gr_activity_1_r());
|
||||
activity2 = nvgpu_readl(g, gr_activity_2_r());
|
||||
activity4 = nvgpu_readl(g, gr_activity_4_r());
|
||||
|
||||
gr_busy = !(gr_activity_empty_or_preempted(activity0) &&
|
||||
gr_activity_empty_or_preempted(activity1) &&
|
||||
activity2 == 0U &&
|
||||
gr_activity_empty_or_preempted(activity4));
|
||||
|
||||
if (!gr_busy && !ctxsw_active) {
|
||||
nvgpu_log_fn(g, "done");
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvgpu_usleep_range(delay, delay * 2U);
|
||||
delay = min_t(u32, delay << 1, NVGPU_GR_IDLE_CHECK_MAX_US);
|
||||
} while (nvgpu_timeout_expired(&timeout) == 0);
|
||||
|
||||
nvgpu_err(g,
|
||||
"timeout, ctxsw busy : %d, gr busy : %d, %08x, %08x, %08x, %08x",
|
||||
ctxsw_active, gr_busy, activity0, activity1, activity2, activity4);
|
||||
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
int gp10b_gr_init_fs_state(struct gk20a *g)
|
||||
{
|
||||
u32 data;
|
||||
|
||||
@@ -30,6 +30,7 @@ struct gk20a;
|
||||
u32 gp10b_gr_init_get_sm_id_size(void);
|
||||
int gp10b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id,
|
||||
struct nvgpu_gr_config *gr_config);
|
||||
int gp10b_gr_init_wait_empty(struct gk20a *g);
|
||||
int gp10b_gr_init_fs_state(struct gk20a *g);
|
||||
int gp10b_gr_init_preemption_state(struct gk20a *g, u32 gfxp_wfi_timeout_count,
|
||||
bool gfxp_wfi_timeout_unit_usec);
|
||||
|
||||
@@ -354,7 +354,6 @@ struct gpu_ops {
|
||||
u32* (*get_rop_l2_en_mask)(struct gk20a *g);
|
||||
void (*init_sm_dsm_reg_info)(void);
|
||||
void (*init_ovr_sm_dsm_perf)(void);
|
||||
int (*wait_empty)(struct gk20a *g);
|
||||
void (*init_cyclestats)(struct gk20a *g);
|
||||
int (*set_sm_debug_mode)(struct gk20a *g, struct channel_gk20a *ch,
|
||||
u64 sms, bool enable);
|
||||
@@ -678,6 +677,7 @@ struct gpu_ops {
|
||||
void (*cwd_gpcs_tpcs_num)(struct gk20a *g,
|
||||
u32 gpc_count,
|
||||
u32 tpc_count);
|
||||
int (*wait_empty)(struct gk20a *g);
|
||||
int (*wait_idle)(struct gk20a *g);
|
||||
int (*wait_fe_idle)(struct gk20a *g);
|
||||
int (*fe_pwr_mode_force_on)(struct gk20a *g,
|
||||
|
||||
@@ -417,7 +417,6 @@ static const struct gpu_ops tu104_ops = {
|
||||
.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask,
|
||||
.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask,
|
||||
.init_sm_dsm_reg_info = gr_tu104_init_sm_dsm_reg_info,
|
||||
.wait_empty = gr_gv11b_wait_empty,
|
||||
.init_cyclestats = gr_gm20b_init_cyclestats,
|
||||
.set_sm_debug_mode = gv11b_gr_set_sm_debug_mode,
|
||||
.bpt_reg_info = gv11b_gr_bpt_reg_info,
|
||||
@@ -660,6 +659,7 @@ static const struct gpu_ops tu104_ops = {
|
||||
.pd_tpc_per_gpc = gm20b_gr_init_pd_tpc_per_gpc,
|
||||
.pd_skip_table_gpc = gm20b_gr_init_pd_skip_table_gpc,
|
||||
.cwd_gpcs_tpcs_num = gm20b_gr_init_cwd_gpcs_tpcs_num,
|
||||
.wait_empty = gp10b_gr_init_wait_empty,
|
||||
.wait_idle = gm20b_gr_init_wait_idle,
|
||||
.wait_fe_idle = gm20b_gr_init_wait_fe_idle,
|
||||
.fe_pwr_mode_force_on =
|
||||
|
||||
Reference in New Issue
Block a user