gpu: nvgpu: add HAL to get offset in gpccs segment

In gr_gk20a_find_priv_offset_in_buffer() we right now calculate offset of a register in gpccs segment based on register address type Separate out sequence to find offset in gpccs segment and move it to new API gr_gk20a_get_offset_in_gpccs_segment() Introduce new HAL gops.gr.get_offset_in_gpccs_segment() and set above API to this HAL Call HAL from gr_gk20a_find_priv_offset_in_buffer() instead of calling direct API Jira NVGPUT-118 Change-Id: I0df798456cf63e3c3a43131f3c4ca7990b89ede0 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1761669 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 18:16:01 +03:00 · 2018-06-26 03:44:55 -07:00
parent 26fe0fbc92
commit 84db72a21c
10 changed files with 95 additions and 40 deletions
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -492,6 +492,9 @@ struct gpu_ops {
 		int (*commit_global_ctx_buffers)(struct gk20a *g,
 			struct channel_gk20a *c, bool patch);
 		u32 (*get_nonpes_aware_tpc)(struct gk20a *g, u32 gpc, u32 tpc);
 		int (*get_offset_in_gpccs_segment)(struct gk20a *g,
 			int addr_type, u32 num_tpcs, u32 num_ppcs,
 			u32 reg_list_ppc_count, u32 *__offset_in_segment);
 	} gr;
 	struct {
 		void (*init_hw)(struct gk20a *g);
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -7136,6 +7136,69 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
 	return 0;
 }
 int gr_gk20a_get_offset_in_gpccs_segment(struct gk20a *g,
 					int addr_type,
 					u32 num_tpcs,
 					u32 num_ppcs,
 					u32 reg_list_ppc_count,
 					u32 *__offset_in_segment)
 {
 	u32 offset_in_segment = 0;
 	struct gr_gk20a *gr = &g->gr;
 	if (addr_type == CTXSW_ADDR_TYPE_TPC) {
 		/*
 		 * reg = gr->ctx_vars.ctxsw_regs.tpc.l;
 		 * offset_in_segment = 0;
 		 */
 	} else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
 			(addr_type == CTXSW_ADDR_TYPE_ETPC)) {
 		offset_in_segment =
 			((gr->ctx_vars.ctxsw_regs.tpc.count *
 				num_tpcs) << 2);
 		nvgpu_log(g, gpu_dbg_info | gpu_dbg_gpu_dbg,
 			"egpc etpc offset_in_segment 0x%#08x",
 			offset_in_segment);
 	} else if (addr_type == CTXSW_ADDR_TYPE_PPC) {
 		/*
 		 * The ucode stores TPC data before PPC data.
 		 * Advance offset past TPC data to PPC data.
 		 */
 		offset_in_segment =
 			(((gr->ctx_vars.ctxsw_regs.tpc.count +
 				gr->ctx_vars.ctxsw_regs.etpc.count) *
 			  num_tpcs) << 2);
 	} else if (addr_type == CTXSW_ADDR_TYPE_GPC) {
 		/*
 		 * The ucode stores TPC/PPC data before GPC data.
 		 * Advance offset past TPC/PPC data to GPC data.
 		 *
 		 * Note 1 PES_PER_GPC case
 		 */
 		u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
 				GPU_LIT_NUM_PES_PER_GPC);
 		if (num_pes_per_gpc > 1) {
 			offset_in_segment =
 				((((gr->ctx_vars.ctxsw_regs.tpc.count +
 					gr->ctx_vars.ctxsw_regs.etpc.count) *
 					num_tpcs) << 2) +
 					((reg_list_ppc_count * num_ppcs) << 2));
 		} else {
 			offset_in_segment =
 				(((gr->ctx_vars.ctxsw_regs.tpc.count +
 					gr->ctx_vars.ctxsw_regs.etpc.count) *
 					num_tpcs) << 2);
 		}
 	} else {
 		nvgpu_log_fn(g, "Unknown address type.");
 		return -EINVAL;
 	}
 	*__offset_in_segment = offset_in_segment;
 	return 0;
 }
 /*
 *  This function will return the 32 bit offset for a priv register if it is
 *  present in the context buffer. The context buffer is in CPU memory.
@@ -7147,7 +7210,6 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
 					       u32 context_buffer_size,
 					       u32 *priv_offset)
 {
 	struct gr_gk20a *gr = &g->gr;
 	u32 i, data32;
 	int err;
 	int addr_type; /*enum ctxsw_addr_type */
@@ -7158,7 +7220,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
 	u32 sys_priv_offset, gpc_priv_offset;
 	u32 ppc_mask, reg_list_ppc_count;
 	u8 *context;
-	u32 offset_to_segment;
+	u32 offset_to_segment, offset_in_segment = 0;
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
@@ -7266,45 +7328,18 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
 			offset_to_segment = gpc_priv_offset *
 				ctxsw_prog_ucode_header_size_in_bytes();
-			if (addr_type == CTXSW_ADDR_TYPE_TPC) {
+			err = g->ops.gr.get_offset_in_gpccs_segment(g,
-				/*reg = gr->ctx_vars.ctxsw_regs.tpc.l;*/
+					addr_type,
-			} else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
+					num_tpcs, num_ppcs, reg_list_ppc_count,
-					(addr_type == CTXSW_ADDR_TYPE_ETPC)) {
+					&offset_in_segment);
-				nvgpu_log(g, gpu_dbg_info | gpu_dbg_gpu_dbg,
+			if (err)
 					"egpc etpc offset_to_segment 0x%#08x",
 					offset_to_segment);
 				offset_to_segment +=
 					((gr->ctx_vars.ctxsw_regs.tpc.count *
 					  num_tpcs) << 2);
 			} else if (addr_type == CTXSW_ADDR_TYPE_PPC) {
 				/* The ucode stores TPC data before PPC data.
 				 * Advance offset past TPC data to PPC data. */
 				offset_to_segment +=
 					(((gr->ctx_vars.ctxsw_regs.tpc.count +
 						gr->ctx_vars.ctxsw_regs.etpc.count) *
 					  num_tpcs) << 2);
 			} else if (addr_type == CTXSW_ADDR_TYPE_GPC) {
 				/* The ucode stores TPC/PPC data before GPC data.
 				 * Advance offset past TPC/PPC data to GPC data. */
 				/* note 1 PES_PER_GPC case */
 				u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
 						GPU_LIT_NUM_PES_PER_GPC);
 				if (num_pes_per_gpc > 1) {
 					offset_to_segment +=
 						((((gr->ctx_vars.ctxsw_regs.tpc.count +
 							gr->ctx_vars.ctxsw_regs.etpc.count) *
 							num_tpcs) << 2) +
 							((reg_list_ppc_count * num_ppcs) << 2));
 				} else {
 					offset_to_segment +=
 						(((gr->ctx_vars.ctxsw_regs.tpc.count +
 							gr->ctx_vars.ctxsw_regs.etpc.count) *
 							num_tpcs) << 2);
 				}
 			} else {
 				nvgpu_log_fn(g, "Unknown address type.");
 				return -EINVAL;
-			}
+
 			offset_to_segment += offset_in_segment;
 			nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
 				"offset_to_segment 0x%#08x",
 				offset_to_segment);
 			err = gr_gk20a_process_context_buffer_priv_segment(g,
 							   addr_type, addr,
 							   i, num_tpcs,
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -849,6 +849,9 @@ int gr_gk20a_create_priv_addr_table(struct gk20a *g,
 void gr_gk20a_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
 	u32 num_fbpas,
 	u32 *priv_addr_table, u32 *t);
 int gr_gk20a_get_offset_in_gpccs_segment(struct gk20a *g,
 	int addr_type, u32 num_tpcs, u32 num_ppcs,
 	u32 reg_list_ppc_count, u32 *__offset_in_segment);
 void gk20a_gr_destroy_ctx_buffer(struct gk20a *g,
 	struct gr_ctx_buffer_desc *desc);
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -330,6 +330,8 @@ static const struct gpu_ops gm20b_ops = {
 		.alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers,
 		.map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers,
 		.commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers,
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 	},
 	.fb = {
 		.reset = fb_gk20a_reset,
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -401,6 +401,8 @@ static const struct gpu_ops gp106_ops = {
 		.alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers,
 		.map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers,
 		.commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers,
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 	},
 	.fb = {
 		.reset = gp106_fb_reset,
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -364,6 +364,8 @@ static const struct gpu_ops gp10b_ops = {
 		.alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers,
 		.map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers,
 		.commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers,
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 	},
 	.fb = {
 		.reset = fb_gk20a_reset,
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -451,6 +451,8 @@ static const struct gpu_ops gv100_ops = {
 		.map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers,
 		.commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers,
 		.get_nonpes_aware_tpc = gr_gv11b_get_nonpes_aware_tpc,
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 	},
 	.fb = {
 		.reset = gv100_fb_reset,
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -418,6 +418,8 @@ static const struct gpu_ops gv11b_ops = {
 		.map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers,
 		.commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers,
 		.get_nonpes_aware_tpc = gr_gv11b_get_nonpes_aware_tpc,
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 	},
 	.fb = {
 		.reset = gv11b_fb_reset,
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -235,6 +235,8 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers,
 		.map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers,
 		.commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers,
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 	},
 	.fb = {
 		.reset = fb_gk20a_reset,
--- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -272,6 +272,8 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers,
 		.map_global_ctx_buffers = gr_gk20a_map_global_ctx_buffers,
 		.commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers,
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 	},
 	.fb = {
 		.reset = gv11b_fb_reset,