From 94255220f75441243e0f49fd34b414f65af34974 Mon Sep 17 00:00:00 2001 From: Antony Clince Alex Date: Tue, 8 Feb 2022 06:17:22 +0000 Subject: [PATCH] gpu: nvgpu: ga10b: add TPC interleaved priv segment support The ctxsw ucode saves all the ctxsw'ed TPC priv registers in the TPC priv segment of the ctxsw image. In ga10b, these registers can be stored in either of the two arrangements: - INTERLEAVED: means the format is sorted by address first, then by TPC number - MIGRATION: exact opposite of interleaved. Update HAL functions gr_ga10b_process_context_buffer_priv_segment, gr_ga10b_find_priv_offset_in_buffer to detect the register layout and calculate the register offset accordingly. Bug 200737000 Bug 3532165 Change-Id: I305509cf89498cb0c2c5bfa1d867272bdf5f42b3 Signed-off-by: Antony Clince Alex Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2665491 Tested-by: mobile promotions Reviewed-by: mobile promotions --- .../hal/gr/ctxsw_prog/ctxsw_prog_ga10b.h | 3 +- .../hal/gr/ctxsw_prog/ctxsw_prog_ga10b_fusa.c | 9 +++- drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c | 53 ++++++++++++++++++- drivers/gpu/nvgpu/hal/init/hal_ga10b.c | 1 + drivers/gpu/nvgpu/include/nvgpu/gops/gr.h | 1 + .../nvgpu/hw/ga10b/hw_ctxsw_prog_ga10b.h | 7 ++- 6 files changed, 69 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b.h b/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b.h index 55e664494..ae46f2111 100644 --- a/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b.h +++ b/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -49,6 +49,7 @@ u32 ga10b_ctxsw_prog_get_compute_tpcreglist_offset(u32 *gpccs_hdr, u32 tpc_num); u32 ga10b_ctxsw_prog_get_gfx_tpcreglist_offset(u32 *gpccs_hdr, u32 tpc_num); u32 ga10b_ctxsw_prog_get_compute_etpcreglist_offset(u32 *gpccs_hdr); u32 ga10b_ctxsw_prog_get_gfx_etpcreglist_offset(u32 *gpccs_hdr); +u32 ga10b_ctxsw_prog_get_tpc_segment_pri_layout(struct gk20a *g, u32 *main_hdr); #endif /* CONFIG_NVGPU_DEBUGGER */ #ifdef CONFIG_DEBUG_FS void ga10b_ctxsw_prog_dump_ctxsw_stats(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b_fusa.c b/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b_fusa.c index 8bd38528a..d2751115d 100644 --- a/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b_fusa.c +++ b/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b_fusa.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -45,6 +45,13 @@ u32 ga10b_ctxsw_prog_hw_get_gpccs_header_stride(void) return ctxsw_prog_gpccs_header_stride_v(); } +u32 ga10b_ctxsw_prog_get_tpc_segment_pri_layout(struct gk20a *g, u32 *main_hdr) +{ + return ctxsw_prog_main_tpc_segment_pri_layout_v_v( + main_hdr[ctxsw_prog_main_tpc_segment_pri_layout_o() >> + BYTE_TO_DW_SHIFT]); +} + u32 ga10b_ctxsw_prog_get_compute_sysreglist_offset(u32 *fecs_hdr) { return ctxsw_prog_local_sys_reglist_offset_compute_v( diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c index 342916a5f..00a0b807a 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -57,6 +58,7 @@ #include #include +#include #define ILLEGAL_ID ~U32(0U) @@ -612,6 +614,10 @@ int gr_ga10b_process_context_buffer_priv_segment(struct gk20a *g, u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + struct nvgpu_gr *gr; + u32 *context_buffer; + u32 tpc_segment_pri_layout; + bool is_tpc_layout_interleaved = false; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr); @@ -619,6 +625,14 @@ int gr_ga10b_process_context_buffer_priv_segment(struct gk20a *g, return -EINVAL; } + gr = nvgpu_gr_get_cur_instance_ptr(g); + context_buffer = nvgpu_gr_obj_ctx_get_local_golden_image_ptr( + gr->golden_image); + tpc_segment_pri_layout = g->ops.gr.ctxsw_prog.get_tpc_segment_pri_layout(g, context_buffer); + nvgpu_assert(tpc_segment_pri_layout != ctxsw_prog_main_tpc_segment_pri_layout_v_invalid_v()); + is_tpc_layout_interleaved = (tpc_segment_pri_layout == + ctxsw_prog_main_tpc_segment_pri_layout_v_interleaved_v()); + /* Process the SYS/BE segment. */ if ((addr_type == CTXSW_ADDR_TYPE_SYS) || (addr_type == CTXSW_ADDR_TYPE_ROP)) { @@ -678,7 +692,12 @@ int gr_ga10b_process_context_buffer_priv_segment(struct gk20a *g, tpc_in_gpc_base + (tpc_num * tpc_in_gpc_stride); address = base_address + tpc_addr; - tpc_offset = reg->index; + if (is_tpc_layout_interleaved) { + tpc_offset = (reg->index * num_tpcs) + + (tpc_num * 4U); + } else { + tpc_offset = reg->index; + } if (pri_addr == address) { *priv_offset = tpc_offset; @@ -696,7 +715,12 @@ int gr_ga10b_process_context_buffer_priv_segment(struct gk20a *g, tpc_in_gpc_base + (tpc_num * tpc_in_gpc_stride); address = base_address + tpc_addr; - tpc_offset = reg->index; + if (is_tpc_layout_interleaved) { + tpc_offset = (reg->index * num_tpcs) + + (tpc_num * 4U); + } else { + tpc_offset = reg->index; + } if (pri_addr == address) { *priv_offset = tpc_offset; @@ -886,6 +910,8 @@ int gr_ga10b_find_priv_offset_in_buffer(struct gk20a *g, u32 addr, u32 segoffset, compute_segoffset; u32 graphics_segoffset; u32 main_hdr_size, fecs_hdr_size, gpccs_hdr_stride; + u32 tpc_segment_pri_layout; + bool is_tpc_layout_interleaved = false; err = g->ops.gr.decode_priv_addr(g, addr, &addr_type, &gpc_num, &tpc_num, &ppc_num, &be_num, @@ -907,6 +933,16 @@ int gr_ga10b_find_priv_offset_in_buffer(struct gk20a *g, u32 addr, fecs_hdr_size = g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(); gpccs_hdr_stride = g->ops.gr.ctxsw_prog.hw_get_gpccs_header_stride(); num_gpcs = g->ops.gr.ctxsw_prog.get_num_gpcs(context); + /* + * Determine the layout of the TPC priv save segment. It can either + * be interleaved or migration. In case of interleaved, the registers + * will be sorted by address first followed by TPC number, migration + * layout is does the exact opposite. + */ + tpc_segment_pri_layout = g->ops.gr.ctxsw_prog.get_tpc_segment_pri_layout(g, context_buffer); + nvgpu_assert(tpc_segment_pri_layout != ctxsw_prog_main_tpc_segment_pri_layout_v_invalid_v()); + is_tpc_layout_interleaved = (tpc_segment_pri_layout == + ctxsw_prog_main_tpc_segment_pri_layout_v_interleaved_v()); /* * Check in extended buffer segment of ctxsw buffer. If found, return @@ -1005,6 +1041,19 @@ int gr_ga10b_find_priv_offset_in_buffer(struct gk20a *g, u32 addr, graphics_segoffset = g->ops.gr.ctxsw_prog.get_gfx_ppcreglist_offset(context); } else if (addr_type == CTXSW_ADDR_TYPE_TPC) { + /* + * Incase of interleaved TPC layout, all TPC registers will be + * saved contiguously starting from TPC0 segment address, + * whereas, in migration layout, registers of each TPC will + * be stored in separate segments based on the tpc number. + * Hence, for interleaved layout the segment start address will + * be a constant for all TPC registers i.e. the segment address + * of TPC0. + */ + if (is_tpc_layout_interleaved) { + tpc_num = 0; + + } compute_segoffset = g->ops.gr.ctxsw_prog.get_compute_tpcreglist_offset(context, tpc_num); graphics_segoffset = diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c index 6e00bc0bf..c24e55152 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c @@ -485,6 +485,7 @@ static const struct gops_gr_ctxsw_prog ga10b_ops_gr_ctxsw_prog = { .get_gfx_ppcreglist_offset = ga10b_ctxsw_prog_get_gfx_ppcreglist_offset, .get_compute_etpcreglist_offset = ga10b_ctxsw_prog_get_compute_etpcreglist_offset, .get_gfx_etpcreglist_offset = ga10b_ctxsw_prog_get_gfx_etpcreglist_offset, + .get_tpc_segment_pri_layout = ga10b_ctxsw_prog_get_tpc_segment_pri_layout, #endif /* CONFIG_NVGPU_DEBUGGER */ #ifdef CONFIG_DEBUG_FS .dump_ctxsw_stats = ga10b_ctxsw_prog_dump_ctxsw_stats, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h index a34ab75ce..993c2ce03 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h @@ -1050,6 +1050,7 @@ struct gops_gr_ctxsw_prog { u32 (*get_gfx_ppcreglist_offset)(u32 *gpccs_hdr); u32 (*get_compute_etpcreglist_offset)(u32 *gpccs_hdr); u32 (*get_gfx_etpcreglist_offset)(u32 *gpccs_hdr); + u32 (*get_tpc_segment_pri_layout)(struct gk20a *g, u32 *main_hdr); #endif #endif }; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ctxsw_prog_ga10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ctxsw_prog_ga10b.h index 849a32e00..c21b2afd1 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ctxsw_prog_ga10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ctxsw_prog_ga10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -87,6 +87,11 @@ #define ctxsw_prog_main_image_num_gfxp_save_ops_o() (0x000000d8U) #define ctxsw_prog_main_image_num_cilp_save_ops_o() (0x000000dcU) #define ctxsw_prog_main_image_num_restore_ops_o() (0x000000f8U) +#define ctxsw_prog_main_tpc_segment_pri_layout_o() (0x000000e4U) +#define ctxsw_prog_main_tpc_segment_pri_layout_v_v(r)\ + (((r) >> 0U) & 0xffffffffU) +#define ctxsw_prog_main_tpc_segment_pri_layout_v_invalid_v() (0x00000000U) +#define ctxsw_prog_main_tpc_segment_pri_layout_v_interleaved_v() (0x00000002U) #define ctxsw_prog_main_image_magic_value_1_o() (0x000000fcU) #define ctxsw_prog_main_image_magic_value_1_v_value_v() (0x600dc0deU) #define ctxsw_prog_main_image_magic_value_2_o() (0x00000100U)