gpu: nvgpu: Add czf_bypass sysfs node for gp10b

This change adds a new sysfs node to allow configuring CZF_BYPASS, to enable platforms with low context-switching latency requirements. /sys/devices/17000000.gp10b/czf_bypass Values: 0 - always 1 - lateZ (default) 2 - single pass 3 - never The specified value will apply only to newly allocated contexts. Bug 1914014 Change-Id: Ibb9a8e86089acaadaa7260b00eedec5c80762d6f Signed-off-by: Peter Boonstoppel <pboonstoppel@nvidia.com> Reviewed-on: http://git-master/r/1478567 (cherry picked from commit 3bc022cb38) Reviewed-on: http://git-master/r/1473820 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2017-05-02 12:09:40 -07:00
parent 65de2a2d65
commit 39a9e251da
6 changed files with 118 additions and 29 deletions
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -366,6 +366,8 @@ struct gpu_ops {
 		int (*resume_from_pause)(struct gk20a *g);
 		int (*clear_sm_errors)(struct gk20a *g);
 		u32 (*tpc_enabled_exceptions)(struct gk20a *g);
+		int (*set_czf_bypass)(struct gk20a *g,
+				      struct channel_gk20a *ch);
 	} gr;
 	struct {
 		void (*init_hw)(struct gk20a *g);
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3312,6 +3312,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a  *c,
 		c->first_init = true;
 	}

+	if (g->ops.gr.set_czf_bypass)
+		g->ops.gr.set_czf_bypass(g, c);
+
 	gk20a_dbg_fn("done");
 	return 0;
 out:
@@ -8236,44 +8239,27 @@ bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
 	return ret;
 }

-int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
-			  struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
-			  u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)
+int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
+			    struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
+			    u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
+			    bool ch_is_curr_ctx)
 {
 	struct gk20a *g = ch->g;
 	struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
 	bool gr_ctx_ready = false;
 	bool pm_ctx_ready = false;
 	struct nvgpu_mem *current_mem = NULL;
-	bool ch_is_curr_ctx, restart_gr_ctxsw = false;
 	u32 i, j, offset, v;
 	struct gr_gk20a *gr = &g->gr;
 	u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
 	u32 *offsets = NULL;
 	u32 *offset_addrs = NULL;
 	u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops};
-	int err, pass;
+	int err = 0, pass;

 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
 		   num_ctx_wr_ops, num_ctx_rd_ops);

-	/* disable channel switching.
-	 * at that point the hardware state can be inspected to
-	 * determine if the context we're interested in is current.
-	 */
-	err = gr_gk20a_disable_ctxsw(g);
-	if (err) {
-		nvgpu_err(g, "unable to stop gr ctxsw");
-		/* this should probably be ctx-fatal... */
-		goto cleanup;
-	}
-
-	restart_gr_ctxsw = true;
-
-	ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
-
-	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx);
-
 	if (ch_is_curr_ctx) {
 		for (pass = 0; pass < 2; pass++) {
 			ctx_op_nr = 0;
@@ -8497,12 +8483,40 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 	if (pm_ctx_ready)
 		nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem);

-	if (restart_gr_ctxsw) {
-		int tmp_err = gr_gk20a_enable_ctxsw(g);
-		if (tmp_err) {
-			nvgpu_err(g, "unable to restart ctxsw!\n");
-			err = tmp_err;
-		}
+	return err;
+}
+
+int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
+			  struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
+			  u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)
+{
+	struct gk20a *g = ch->g;
+	int err, tmp_err;
+	bool ch_is_curr_ctx;
+
+	/* disable channel switching.
+	 * at that point the hardware state can be inspected to
+	 * determine if the context we're interested in is current.
+	 */
+	err = gr_gk20a_disable_ctxsw(g);
+	if (err) {
+		nvgpu_err(g, "unable to stop gr ctxsw");
+		/* this should probably be ctx-fatal... */
+		return err;
+	}
+
+	ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d",
+		  ch_is_curr_ctx);
+
+	err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops,
+				      num_ctx_rd_ops, ch_is_curr_ctx);
+
+	tmp_err = gr_gk20a_enable_ctxsw(g);
+	if (tmp_err) {
+		nvgpu_err(g, "unable to restart ctxsw!\n");
+		err = tmp_err;
 	}

 	return err;
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -320,6 +320,7 @@ struct gr_gk20a {
 	u32 alpha_cb_default_size;
 	u32 alpha_cb_size;
 	u32 timeslice_mode;
+	u32 czf_bypass;

 	struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];

@@ -563,6 +564,10 @@ struct nvgpu_dbg_gpu_reg_op;
 int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 			  struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
 			  u32 num_ctx_wr_ops, u32 num_ctx_rd_ops);
+int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
+			    struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
+			    u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
+			    bool ch_is_curr_ctx);
 int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
 				    u32 addr,
 				    u32 max_offsets,
--- a/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c
+++ b/drivers/gpu/nvgpu/gp10b/gp10b_sysfs.c
@@ -1,7 +1,7 @@
 /*
 * GP10B specific sysfs files
 *
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -18,6 +18,8 @@
 #include "gk20a/gk20a.h"
 #include "gp10b_sysfs.h"

+#include <nvgpu/hw/gp10b/hw_gr_gp10b.h>
+
 #define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)

 static ssize_t ecc_enable_store(struct device *dev,
@@ -49,12 +51,43 @@ static ssize_t ecc_enable_read(struct device *dev,

 static DEVICE_ATTR(ecc_enable, ROOTRW, ecc_enable_read, ecc_enable_store);

+
+static ssize_t czf_bypass_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct gk20a *g = get_gk20a(dev);
+	unsigned long val;
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (val >= 4)
+		return -EINVAL;
+
+	g->gr.czf_bypass = val;
+
+	return count;
+}
+
+static ssize_t czf_bypass_read(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct gk20a *g = get_gk20a(dev);
+
+	return sprintf(buf, "%d\n", g->gr.czf_bypass);
+}
+
+static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store);
+
 void gp10b_create_sysfs(struct device *dev)
 {
 	struct gk20a *g = get_gk20a(dev);
 	int error = 0;

+	g->gr.czf_bypass = gr_gpc0_prop_debug1_czf_bypass_init_v();
+
 	error |= device_create_file(dev, &dev_attr_ecc_enable);
+	error |= device_create_file(dev, &dev_attr_czf_bypass);
 	if (error)
 		nvgpu_err(g, "Failed to create sysfs attributes!\n");
 }
@@ -62,4 +95,5 @@ void gp10b_create_sysfs(struct device *dev)
 void gp10b_remove_sysfs(struct device *dev)
 {
 	device_remove_file(dev, &dev_attr_ecc_enable);
+	device_remove_file(dev, &dev_attr_czf_bypass);
 }
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -27,6 +27,7 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
+#include "gk20a/regops_gk20a.h"

 #include "gm20b/gr_gm20b.h"
 #include "gp10b/gr_gp10b.h"
@@ -2304,6 +2305,22 @@ static void gr_gp10b_write_preemption_ptr(struct gk20a *g,

 }

+int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch)
+{
+	struct nvgpu_dbg_gpu_reg_op ops;
+
+	ops.op     = REGOP(WRITE_32);
+	ops.type   = REGOP(TYPE_GR_CTX);
+	ops.status = REGOP(STATUS_SUCCESS);
+	ops.value_hi      = 0;
+	ops.and_n_mask_lo = gr_gpc0_prop_debug1_czf_bypass_m();
+	ops.and_n_mask_hi = 0;
+	ops.offset   = gr_gpc0_prop_debug1_r();
+	ops.value_lo = gr_gpc0_prop_debug1_czf_bypass_f(
+		g->gr.czf_bypass);
+
+	return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false);
+}

 void gp10b_init_gr(struct gpu_ops *gops)
 {
@@ -2355,4 +2372,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
 	gops->gr.load_smid_config = gr_gp10b_load_smid_config;
 	gops->gr.set_boosted_ctx = gr_gp10b_set_boosted_ctx;
 	gops->gr.update_boosted_ctx = gr_gp10b_update_boosted_ctx;
+	gops->gr.set_czf_bypass = gr_gp10b_set_czf_bypass;
 }
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_gr_gp10b.h
@@ -4270,4 +4270,20 @@ static inline u32 gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(void)
 {
 	return 0xff << 0;
 }
+static inline u32 gr_gpc0_prop_debug1_r(void)
+{
+	return 0x00500400;
+}
+static inline u32 gr_gpc0_prop_debug1_czf_bypass_f(u32 v)
+{
+	return (v & 0x3) << 14;
+}
+static inline u32 gr_gpc0_prop_debug1_czf_bypass_m(void)
+{
+	return 0x3 << 14;
+}
+static inline u32 gr_gpc0_prop_debug1_czf_bypass_init_v(void)
+{
+	return 0x00000001;
+}
 #endif