From aa4daddda23aa44a84464200f497eac802a8e6ce Mon Sep 17 00:00:00 2001 From: Sandeep Shinde Date: Thu, 24 Aug 2017 12:12:42 +0530 Subject: [PATCH] gpu: nvgpu: Add pd_max_batches sysfs node for gp10b Add a new sysfs node pd_max_batches for setting max batches value in NV_PGRAPH_PRI_PD_AB_DIST_CONFIG_1_MAX_BATCHES register which controls max number of batches per alpha-beta transition stored in PD. Bug 1927124 Change-Id: I2817f2d70dab348d8b0b8ba19bf1e9b9d23ca907 Signed-off-by: Sandeep Shinde Reviewed-on: https://git-master.nvidia.com/r/1544104 Reviewed-by: Bharat Nihalani GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 1 + drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c | 29 +++++++++++++++++++ drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 24 +++++++++++---- .../include/nvgpu/hw/gp10b/hw_gr_gp10b.h | 4 +++ 4 files changed, 52 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 95a1d045a..7a9ad5c1f 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -321,6 +321,7 @@ struct gr_gk20a { u32 alpha_cb_size; u32 timeslice_mode; u32 czf_bypass; + u32 pd_max_batches; struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF]; diff --git a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c index dff569544..adfb4bafb 100644 --- a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c +++ b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c @@ -79,6 +79,33 @@ static ssize_t czf_bypass_read(struct device *dev, static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store); +static ssize_t pd_max_batches_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct gk20a *g = get_gk20a(dev); + unsigned long val; + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + if (val >= 64) + return -EINVAL; + + g->gr.pd_max_batches = val; + + return count; +} + +static ssize_t pd_max_batches_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct gk20a *g = get_gk20a(dev); + + return sprintf(buf, "%d\n", g->gr.pd_max_batches); +} + +static DEVICE_ATTR(pd_max_batches, ROOTRW, pd_max_batches_read, pd_max_batches_store); + void gp10b_create_sysfs(struct device *dev) { struct gk20a *g = get_gk20a(dev); @@ -88,6 +115,7 @@ void gp10b_create_sysfs(struct device *dev) error |= device_create_file(dev, &dev_attr_ecc_enable); error |= device_create_file(dev, &dev_attr_czf_bypass); + error |= device_create_file(dev, &dev_attr_pd_max_batches); if (error) dev_err(dev, "Failed to create sysfs attributes!\n"); } @@ -96,4 +124,5 @@ void gp10b_remove_sysfs(struct device *dev) { device_remove_file(dev, &dev_attr_ecc_enable); device_remove_file(dev, &dev_attr_czf_bypass); + device_remove_file(dev, &dev_attr_pd_max_batches); } diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 6ceaf621c..befaf2981 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -400,9 +400,15 @@ static int gr_gp10b_commit_global_cb_manager(struct gk20a *g, gr_gpc0_ppc0_cbm_beta_cb_size_v_granularity_v()) / gr_pd_ab_dist_cfg1_max_output_granularity_v(); - gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), - gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | - gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); + if (g->gr.pd_max_batches) { + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), + gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | + gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches), patch); + } else { + gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), + gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | + gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); + } attrib_offset_in_chunk = alpha_offset_in_chunk + gr->tpc_count * gr->alpha_cb_size; @@ -731,9 +737,15 @@ static void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data) gr_gpc0_ppc0_cbm_alpha_cb_size_v_granularity_v() / gr_pd_ab_dist_cfg1_max_output_granularity_v(); - gk20a_writel(g, gr_pd_ab_dist_cfg1_r(), - gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | - gr_pd_ab_dist_cfg1_max_batches_init_f()); + if (g->gr.pd_max_batches) { + gk20a_writel(g, gr_pd_ab_dist_cfg1_r(), + gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | + gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches)); + } else { + gk20a_writel(g, gr_pd_ab_dist_cfg1_r(), + gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | + gr_pd_ab_dist_cfg1_max_batches_init_f()); + } for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { stride = gpc_stride * gpc_index; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h index 079e05d11..a8ca9b8da 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h @@ -1662,6 +1662,10 @@ static inline u32 gr_pd_ab_dist_cfg1_r(void) { return 0x004064c4; } +static inline u32 gr_pd_ab_dist_cfg1_max_batches_f(u32 v) +{ + return (v & 0xffff) << 0; +} static inline u32 gr_pd_ab_dist_cfg1_max_batches_init_f(void) { return 0xffff;