video: tegra: host: gk20a: add class perf settings

Add a place to edit context-switched perf settings based upon class. Disable tex-lock as the first of such for compute. Bug 1409041 Change-Id: I5317a2a2e5f855661a1400b42f69211d16ae0c1d Signed-off-by: Randy Spurlock <rspurlock@nvidia.com> Reviewed-on: http://git-master/r/405908 (cherry picked from commit 250e149be35ecb8893dcef053ec44ffea86c302a) Reviewed-on: http://git-master/r/407094 (cherry picked from commit 54337c08cbf6c2c6b5c929c1be24e87165d9d946) Reviewed-on: http://git-master/r/408837 Reviewed-by: Mandar Padmawar <mpadmawar@nvidia.com> Tested-by: Mandar Padmawar <mpadmawar@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2014-05-05 18:37:54 -05:00
parent acd6d02069
commit effa9dcfaa
2 changed files with 89 additions and 0 deletions
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -2801,6 +2801,42 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
 			gr_gk20a_commit_global_ctx_buffers(g, c, true));
 	}

+	/* tweak any perf parameters per-context here */
+	if (args->class_num == KEPLER_COMPUTE_A) {
+		int begin_err;
+		u32 tex_lock_disable_mask =
+			gr_gpcs_tpcs_sm_sch_texlock_tex_hash_m()         |
+			gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tile_m()    |
+			gr_gpcs_tpcs_sm_sch_texlock_tex_hash_phase_m()   |
+			gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tex_m()     |
+			gr_gpcs_tpcs_sm_sch_texlock_tex_hash_timeout_m() |
+			gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_m();
+
+		u32 texlock = gk20a_readl(g, gr_gpcs_tpcs_sm_sch_texlock_r());
+
+		texlock = (texlock & ~tex_lock_disable_mask) |
+		(gr_gpcs_tpcs_sm_sch_texlock_tex_hash_disable_f()         |
+		 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tile_disable_f()    |
+		 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_phase_disable_f()   |
+		 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tex_disable_f()     |
+		 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_timeout_disable_f() |
+		 gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_disable_f());
+
+		begin_err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
+
+		if (!begin_err) {
+			err = gr_gk20a_ctx_patch_write(g, ch_ctx,
+				gr_gpcs_tpcs_sm_sch_texlock_r(),
+				texlock, true);
+		}
+		if ((begin_err || err)) {
+			gk20a_err(dev_from_gk20a(g),
+				   "failed to set texlock for compute class");
+		}
+		if (!begin_err)
+			gr_gk20a_ctx_patch_write_end(g, ch_ctx);
+	}
+
 	/* init golden image, ELPG enabled after this is done */
 	err = gr_gk20a_init_golden_ctx_image(g, c);
 	if (err) {
@@ -4072,6 +4108,7 @@ static void gk20a_gr_enable_gpc_exceptions(struct gk20a *g)
 		gr_gpc0_gpccs_gpc_exception_en_tpc_0_enabled_f());
 }

+
 void gr_gk20a_enable_hww_exceptions(struct gk20a *g)
 {
 	/* enable exceptions */
--- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -3190,4 +3190,56 @@ static inline u32 gr_gpc0_tpc0_l1c_dbg_cya15_en_f(void)
 {
 	return 0x8000000;
 }
+static inline u32 gr_gpcs_tpcs_sm_sch_texlock_r(void)
+{
+	return 0x00419ec8;
+}
+static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_m(void)
+{
+	return 0x1 << 0;
+}
+static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tile_m(void)
+{
+	return 0x1 << 1;
+}
+static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tile_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_phase_m(void)
+{
+	return 0x1 << 2;
+}
+static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_phase_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tex_m(void)
+{
+	return 0x1 << 3;
+}
+static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_tex_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_timeout_m(void)
+{
+	return 0xff << 4;
+}
+static inline u32 gr_gpcs_tpcs_sm_sch_texlock_tex_hash_timeout_disable_f(void)
+{
+	return 0x0;
+}
+static inline u32 gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_m(void)
+{
+	return 0x1 << 16;
+}
+static inline u32 gr_gpcs_tpcs_sm_sch_texlock_dot_t_unlock_disable_f(void)
+{
+	return 0x0;
+}
 #endif