gpu: nvgpu: Idle GR before calling PMU ZBC save

On gk20a when PMU is updating ZBC colors it is reading them from L2.
But L2 has one port, and ZBC reads can race with other transactions.
Idle graphics before sending PMU the ZBC_UPDATE request.

Also makes pmu_save_zbc a HAL, because PMU ucode has changes to bypass
this problem on some chips.

Bug 1746047

Change-Id: Id8fcd6850af7ef1d8f0a6aafa0fe6b4f88b5f2d9
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1129017
This commit is contained in:
Terje Bergstrom
2016-04-19 10:27:11 -07:00
parent b10e02f537
commit ec62c649b5
4 changed files with 79 additions and 1 deletions

View File

@@ -188,6 +188,9 @@ struct gpu_ops {
struct zbc_entry *zbc_val); struct zbc_entry *zbc_val);
int (*zbc_query_table)(struct gk20a *g, struct gr_gk20a *gr, int (*zbc_query_table)(struct gk20a *g, struct gr_gk20a *gr,
struct zbc_query_params *query_params); struct zbc_query_params *query_params);
void (*pmu_save_zbc)(struct gk20a *g, u32 entries);
int (*add_zbc)(struct gk20a *g, struct gr_gk20a *gr,
struct zbc_entry *zbc_val);
u32 (*pagepool_default_size)(struct gk20a *g); u32 (*pagepool_default_size)(struct gk20a *g);
int (*init_ctx_state)(struct gk20a *g); int (*init_ctx_state)(struct gk20a *g);
int (*alloc_gr_ctx)(struct gk20a *g, int (*alloc_gr_ctx)(struct gk20a *g,

View File

@@ -3748,6 +3748,40 @@ int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
return 0; return 0;
} }
void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries)
{
struct fifo_gk20a *f = &g->fifo;
struct fifo_engine_info_gk20a *gr_info =
f->engine_info + ENGINE_GR_GK20A;
unsigned long end_jiffies = jiffies +
msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
u32 ret;
ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
if (ret) {
gk20a_err(dev_from_gk20a(g),
"failed to disable gr engine activity");
return;
}
ret = g->ops.gr.wait_empty(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
if (ret) {
gk20a_err(dev_from_gk20a(g),
"failed to idle graphics");
goto clean_up;
}
/* update zbc */
gk20a_pmu_save_zbc(g, entries);
clean_up:
ret = gk20a_fifo_enable_engine_activity(g, gr_info);
if (ret) {
gk20a_err(dev_from_gk20a(g),
"failed to enable gr engine activity\n");
}
}
int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr, int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
struct zbc_entry *zbc_val) struct zbc_entry *zbc_val)
{ {
@@ -3840,7 +3874,7 @@ int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
/* update zbc for elpg only when new entry is added */ /* update zbc for elpg only when new entry is added */
entries = max(gr->max_used_color_index, entries = max(gr->max_used_color_index,
gr->max_used_depth_index); gr->max_used_depth_index);
gk20a_pmu_save_zbc(g, entries); g->ops.gr.pmu_save_zbc(g, entries);
} }
err_mutex: err_mutex:
@@ -3995,6 +4029,40 @@ int gr_gk20a_load_zbc_default_table(struct gk20a *g, struct gr_gk20a *gr)
return 0; return 0;
} }
int _gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
struct zbc_entry *zbc_val)
{
struct fifo_gk20a *f = &g->fifo;
struct fifo_engine_info_gk20a *gr_info = f->engine_info + ENGINE_GR_GK20A;
unsigned long end_jiffies;
int ret;
ret = gk20a_fifo_disable_engine_activity(g, gr_info, true);
if (ret) {
gk20a_err(dev_from_gk20a(g),
"failed to disable gr engine activity");
return ret;
}
end_jiffies = jiffies + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g));
ret = g->ops.gr.wait_empty(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
if (ret) {
gk20a_err(dev_from_gk20a(g),
"failed to idle graphics");
goto clean_up;
}
ret = gr_gk20a_add_zbc(g, gr, zbc_val);
clean_up:
if (gk20a_fifo_enable_engine_activity(g, gr_info)) {
gk20a_err(dev_from_gk20a(g),
"failed to enable gr engine activity");
}
return ret;
}
int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr, int gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
struct zbc_entry *zbc_val) struct zbc_entry *zbc_val)
{ {
@@ -8618,6 +8686,8 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth; gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth;
gops->gr.zbc_set_table = gk20a_gr_zbc_set_table; gops->gr.zbc_set_table = gk20a_gr_zbc_set_table;
gops->gr.zbc_query_table = gr_gk20a_query_zbc; gops->gr.zbc_query_table = gr_gk20a_query_zbc;
gops->gr.pmu_save_zbc = gr_gk20a_pmu_save_zbc;
gops->gr.add_zbc = _gk20a_gr_zbc_set_table;
gops->gr.pagepool_default_size = gr_gk20a_pagepool_default_size; gops->gr.pagepool_default_size = gr_gk20a_pagepool_default_size;
gops->gr.init_ctx_state = gr_gk20a_init_ctx_state; gops->gr.init_ctx_state = gr_gk20a_init_ctx_state;
gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx; gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx;

View File

@@ -553,6 +553,9 @@ int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
struct zbc_entry *color_val, u32 index); struct zbc_entry *color_val, u32 index);
int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr, int gr_gk20a_add_zbc_depth(struct gk20a *g, struct gr_gk20a *gr,
struct zbc_entry *depth_val, u32 index); struct zbc_entry *depth_val, u32 index);
int _gk20a_gr_zbc_set_table(struct gk20a *g, struct gr_gk20a *gr,
struct zbc_entry *zbc_val);
void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries);
int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies, int gr_gk20a_wait_idle(struct gk20a *g, unsigned long end_jiffies,
u32 expect_delay); u32 expect_delay);
int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, int gr_gk20a_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,

View File

@@ -1372,6 +1372,8 @@ void gm20b_init_gr(struct gpu_ops *gops)
gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth; gops->gr.add_zbc_depth = gr_gk20a_add_zbc_depth;
gops->gr.zbc_set_table = gk20a_gr_zbc_set_table; gops->gr.zbc_set_table = gk20a_gr_zbc_set_table;
gops->gr.zbc_query_table = gr_gk20a_query_zbc; gops->gr.zbc_query_table = gr_gk20a_query_zbc;
gops->gr.pmu_save_zbc = gk20a_pmu_save_zbc;
gops->gr.add_zbc = gr_gk20a_add_zbc;
gops->gr.pagepool_default_size = gr_gm20b_pagepool_default_size; gops->gr.pagepool_default_size = gr_gm20b_pagepool_default_size;
gops->gr.init_ctx_state = gr_gk20a_init_ctx_state; gops->gr.init_ctx_state = gr_gk20a_init_ctx_state;
gops->gr.alloc_gr_ctx = gr_gm20b_alloc_gr_ctx; gops->gr.alloc_gr_ctx = gr_gm20b_alloc_gr_ctx;