diff --git a/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b.h b/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b.h index 55e664494..ae46f2111 100644 --- a/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b.h +++ b/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -49,6 +49,7 @@ u32 ga10b_ctxsw_prog_get_compute_tpcreglist_offset(u32 *gpccs_hdr, u32 tpc_num); u32 ga10b_ctxsw_prog_get_gfx_tpcreglist_offset(u32 *gpccs_hdr, u32 tpc_num); u32 ga10b_ctxsw_prog_get_compute_etpcreglist_offset(u32 *gpccs_hdr); u32 ga10b_ctxsw_prog_get_gfx_etpcreglist_offset(u32 *gpccs_hdr); +u32 ga10b_ctxsw_prog_get_tpc_segment_pri_layout(struct gk20a *g, u32 *main_hdr); #endif /* CONFIG_NVGPU_DEBUGGER */ #ifdef CONFIG_DEBUG_FS void ga10b_ctxsw_prog_dump_ctxsw_stats(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b_fusa.c b/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b_fusa.c index 8bd38528a..d2751115d 100644 --- a/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b_fusa.c +++ b/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_ga10b_fusa.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -45,6 +45,13 @@ u32 ga10b_ctxsw_prog_hw_get_gpccs_header_stride(void) return ctxsw_prog_gpccs_header_stride_v(); } +u32 ga10b_ctxsw_prog_get_tpc_segment_pri_layout(struct gk20a *g, u32 *main_hdr) +{ + return ctxsw_prog_main_tpc_segment_pri_layout_v_v( + main_hdr[ctxsw_prog_main_tpc_segment_pri_layout_o() >> + BYTE_TO_DW_SHIFT]); +} + u32 ga10b_ctxsw_prog_get_compute_sysreglist_offset(u32 *fecs_hdr) { return ctxsw_prog_local_sys_reglist_offset_compute_v( diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c index 342916a5f..00a0b807a 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_ga10b.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -57,6 +58,7 @@ #include #include +#include #define ILLEGAL_ID ~U32(0U) @@ -612,6 +614,10 @@ int gr_ga10b_process_context_buffer_priv_segment(struct gk20a *g, u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE); u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); + struct nvgpu_gr *gr; + u32 *context_buffer; + u32 tpc_segment_pri_layout; + bool is_tpc_layout_interleaved = false; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr); @@ -619,6 +625,14 @@ int gr_ga10b_process_context_buffer_priv_segment(struct gk20a *g, return -EINVAL; } + gr = nvgpu_gr_get_cur_instance_ptr(g); + context_buffer = nvgpu_gr_obj_ctx_get_local_golden_image_ptr( + gr->golden_image); + tpc_segment_pri_layout = g->ops.gr.ctxsw_prog.get_tpc_segment_pri_layout(g, context_buffer); + nvgpu_assert(tpc_segment_pri_layout != ctxsw_prog_main_tpc_segment_pri_layout_v_invalid_v()); + is_tpc_layout_interleaved = (tpc_segment_pri_layout == + ctxsw_prog_main_tpc_segment_pri_layout_v_interleaved_v()); + /* Process the SYS/BE segment. */ if ((addr_type == CTXSW_ADDR_TYPE_SYS) || (addr_type == CTXSW_ADDR_TYPE_ROP)) { @@ -678,7 +692,12 @@ int gr_ga10b_process_context_buffer_priv_segment(struct gk20a *g, tpc_in_gpc_base + (tpc_num * tpc_in_gpc_stride); address = base_address + tpc_addr; - tpc_offset = reg->index; + if (is_tpc_layout_interleaved) { + tpc_offset = (reg->index * num_tpcs) + + (tpc_num * 4U); + } else { + tpc_offset = reg->index; + } if (pri_addr == address) { *priv_offset = tpc_offset; @@ -696,7 +715,12 @@ int gr_ga10b_process_context_buffer_priv_segment(struct gk20a *g, tpc_in_gpc_base + (tpc_num * tpc_in_gpc_stride); address = base_address + tpc_addr; - tpc_offset = reg->index; + if (is_tpc_layout_interleaved) { + tpc_offset = (reg->index * num_tpcs) + + (tpc_num * 4U); + } else { + tpc_offset = reg->index; + } if (pri_addr == address) { *priv_offset = tpc_offset; @@ -886,6 +910,8 @@ int gr_ga10b_find_priv_offset_in_buffer(struct gk20a *g, u32 addr, u32 segoffset, compute_segoffset; u32 graphics_segoffset; u32 main_hdr_size, fecs_hdr_size, gpccs_hdr_stride; + u32 tpc_segment_pri_layout; + bool is_tpc_layout_interleaved = false; err = g->ops.gr.decode_priv_addr(g, addr, &addr_type, &gpc_num, &tpc_num, &ppc_num, &be_num, @@ -907,6 +933,16 @@ int gr_ga10b_find_priv_offset_in_buffer(struct gk20a *g, u32 addr, fecs_hdr_size = g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(); gpccs_hdr_stride = g->ops.gr.ctxsw_prog.hw_get_gpccs_header_stride(); num_gpcs = g->ops.gr.ctxsw_prog.get_num_gpcs(context); + /* + * Determine the layout of the TPC priv save segment. It can either + * be interleaved or migration. In case of interleaved, the registers + * will be sorted by address first followed by TPC number, migration + * layout is does the exact opposite. + */ + tpc_segment_pri_layout = g->ops.gr.ctxsw_prog.get_tpc_segment_pri_layout(g, context_buffer); + nvgpu_assert(tpc_segment_pri_layout != ctxsw_prog_main_tpc_segment_pri_layout_v_invalid_v()); + is_tpc_layout_interleaved = (tpc_segment_pri_layout == + ctxsw_prog_main_tpc_segment_pri_layout_v_interleaved_v()); /* * Check in extended buffer segment of ctxsw buffer. If found, return @@ -1005,6 +1041,19 @@ int gr_ga10b_find_priv_offset_in_buffer(struct gk20a *g, u32 addr, graphics_segoffset = g->ops.gr.ctxsw_prog.get_gfx_ppcreglist_offset(context); } else if (addr_type == CTXSW_ADDR_TYPE_TPC) { + /* + * Incase of interleaved TPC layout, all TPC registers will be + * saved contiguously starting from TPC0 segment address, + * whereas, in migration layout, registers of each TPC will + * be stored in separate segments based on the tpc number. + * Hence, for interleaved layout the segment start address will + * be a constant for all TPC registers i.e. the segment address + * of TPC0. + */ + if (is_tpc_layout_interleaved) { + tpc_num = 0; + + } compute_segoffset = g->ops.gr.ctxsw_prog.get_compute_tpcreglist_offset(context, tpc_num); graphics_segoffset = diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c index 6e00bc0bf..c24e55152 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c @@ -485,6 +485,7 @@ static const struct gops_gr_ctxsw_prog ga10b_ops_gr_ctxsw_prog = { .get_gfx_ppcreglist_offset = ga10b_ctxsw_prog_get_gfx_ppcreglist_offset, .get_compute_etpcreglist_offset = ga10b_ctxsw_prog_get_compute_etpcreglist_offset, .get_gfx_etpcreglist_offset = ga10b_ctxsw_prog_get_gfx_etpcreglist_offset, + .get_tpc_segment_pri_layout = ga10b_ctxsw_prog_get_tpc_segment_pri_layout, #endif /* CONFIG_NVGPU_DEBUGGER */ #ifdef CONFIG_DEBUG_FS .dump_ctxsw_stats = ga10b_ctxsw_prog_dump_ctxsw_stats, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h index a34ab75ce..993c2ce03 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h @@ -1050,6 +1050,7 @@ struct gops_gr_ctxsw_prog { u32 (*get_gfx_ppcreglist_offset)(u32 *gpccs_hdr); u32 (*get_compute_etpcreglist_offset)(u32 *gpccs_hdr); u32 (*get_gfx_etpcreglist_offset)(u32 *gpccs_hdr); + u32 (*get_tpc_segment_pri_layout)(struct gk20a *g, u32 *main_hdr); #endif #endif }; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ctxsw_prog_ga10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ctxsw_prog_ga10b.h index 849a32e00..c21b2afd1 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ctxsw_prog_ga10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_ctxsw_prog_ga10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -87,6 +87,11 @@ #define ctxsw_prog_main_image_num_gfxp_save_ops_o() (0x000000d8U) #define ctxsw_prog_main_image_num_cilp_save_ops_o() (0x000000dcU) #define ctxsw_prog_main_image_num_restore_ops_o() (0x000000f8U) +#define ctxsw_prog_main_tpc_segment_pri_layout_o() (0x000000e4U) +#define ctxsw_prog_main_tpc_segment_pri_layout_v_v(r)\ + (((r) >> 0U) & 0xffffffffU) +#define ctxsw_prog_main_tpc_segment_pri_layout_v_invalid_v() (0x00000000U) +#define ctxsw_prog_main_tpc_segment_pri_layout_v_interleaved_v() (0x00000002U) #define ctxsw_prog_main_image_magic_value_1_o() (0x000000fcU) #define ctxsw_prog_main_image_magic_value_1_v_value_v() (0x600dc0deU) #define ctxsw_prog_main_image_magic_value_2_o() (0x00000100U)