diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 96ff609b0..b2f20939b 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -68,6 +68,9 @@ nvgpu-y += common/bus/bus_gk20a.o \ common/ltc/ltc_gp10b.o \ common/ltc/ltc_gv11b.o \ common/ltc/ltc_tu104.o \ + common/gr/ctxsw_prog/ctxsw_prog_gm20b.o \ + common/gr/ctxsw_prog/ctxsw_prog_gp10b.o \ + common/gr/ctxsw_prog/ctxsw_prog_gv11b.o \ common/netlist/netlist.o \ common/netlist/netlist_sim.o \ common/netlist/netlist_gm20b.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 1a1590d9b..471ff5fe5 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -106,6 +106,9 @@ srcs := os/posix/nvgpu.c \ common/falcon/falcon_gp106.c \ common/falcon/falcon_gv100.c \ common/falcon/falcon_tu104.c \ + common/gr/ctxsw_prog/ctxsw_prog_gm20b.c \ + common/gr/ctxsw_prog/ctxsw_prog_gp10b.c \ + common/gr/ctxsw_prog/ctxsw_prog_gv11b.c \ common/netlist/netlist.c \ common/netlist/netlist_sim.c \ common/netlist/netlist_gm20b.c \ diff --git a/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.c b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.c new file mode 100644 index 000000000..da66a1b7d --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.c @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "ctxsw_prog_gm20b.h" + +#include + +u32 gm20b_ctxsw_prog_hw_get_fecs_header_size(void) +{ + return ctxsw_prog_fecs_header_v(); +} + +u32 gm20b_ctxsw_prog_hw_get_gpccs_header_size(void) +{ + return ctxsw_prog_gpccs_header_stride_v(); +} + +u32 gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes(void) +{ + return ctxsw_prog_extended_buffer_segments_size_in_bytes_v(); +} + +u32 gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes(void) +{ + return ctxsw_prog_extended_marker_size_in_bytes_v(); +} + +u32 gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride(void) +{ + return ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); +} + +u32 gm20b_ctxsw_prog_get_main_image_ctx_id(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + return nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_context_id_o()); +} + +u32 gm20b_ctxsw_prog_get_patch_count(struct gk20a *g, struct nvgpu_mem *ctx_mem) +{ + return nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_patch_count_o()); +} + +void gm20b_ctxsw_prog_set_patch_count(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u32 count) +{ + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_patch_count_o(), count); +} + +void gm20b_ctxsw_prog_set_patch_addr(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr) +{ + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_patch_adr_lo_o(), u64_lo32(addr)); + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_patch_adr_hi_o(), u64_hi32(addr)); +} + +void gm20b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem, + u64 addr) +{ + addr = addr >> 8; + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_ptr_o(), + u64_lo32(addr)); +} + +void gm20b_ctxsw_prog_set_zcull(struct gk20a *g, struct nvgpu_mem *ctx_mem, + u32 mode) +{ + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_o(), mode); +} + +void gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_o(), + ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); +} + +bool gm20b_ctxsw_prog_is_zcull_mode_separate_buffer(u32 mode) +{ + return mode == ctxsw_prog_main_image_zcull_mode_separate_buffer_v(); +} + +void gm20b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem, + u64 addr) +{ + addr = addr >> 8; + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_ptr_o(), + u64_lo32(addr)); +} + +void gm20b_ctxsw_prog_set_pm_mode(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u32 mode) +{ + u32 data; + + data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o()); + + data = data & ~ctxsw_prog_main_image_pm_mode_m(); + data |= mode; + + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data); +} + +void gm20b_ctxsw_prog_set_pm_smpc_mode(struct gk20a *g, + struct nvgpu_mem *ctx_mem, bool enable) +{ + u32 data; + + data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o()); + + data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); + data |= enable ? + ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : + ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); + + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data); +} + +u32 gm20b_ctxsw_prog_set_pm_mode_no_ctxsw(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + gm20b_ctxsw_prog_set_pm_mode(g, ctx_mem, + ctxsw_prog_main_image_pm_mode_no_ctxsw_f()); + return ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); +} + +u32 gm20b_ctxsw_prog_set_pm_mode_ctxsw(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + gm20b_ctxsw_prog_set_pm_mode(g, ctx_mem, + ctxsw_prog_main_image_pm_mode_ctxsw_f()); + return ctxsw_prog_main_image_pm_mode_ctxsw_f(); +} + + +u32 gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw(void) +{ + return ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); +} + +u32 gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw(void) +{ + return ctxsw_prog_main_image_pm_mode_ctxsw_f(); +} + +void gm20b_ctxsw_prog_init_ctxsw_hdr_data(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_num_save_ops_o(), 0); + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_num_restore_ops_o(), 0); +} + +void gm20b_ctxsw_prog_set_compute_preemption_mode_cta(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_preemption_options_o(), + ctxsw_prog_main_image_preemption_options_control_cta_enabled_f()); +} + +void gm20b_ctxsw_prog_set_cde_enabled(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + u32 data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_ctl_o()); + + data |= ctxsw_prog_main_image_ctl_cde_enabled_f(); + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_ctl_o(), data); +} + +void gm20b_ctxsw_prog_set_pc_sampling(struct gk20a *g, + struct nvgpu_mem *ctx_mem, bool enable) +{ + u32 data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o()); + + data &= ~ctxsw_prog_main_image_pm_pc_sampling_m(); + data |= ctxsw_prog_main_image_pm_pc_sampling_f(enable); + + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data); +} + +void gm20b_ctxsw_prog_set_priv_access_map_config_mode(struct gk20a *g, + struct nvgpu_mem *ctx_mem, bool allow_all) +{ + if (allow_all) { + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_priv_access_map_config_o(), + ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f()); + } else { + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_priv_access_map_config_o(), + ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f()); + } +} + +void gm20b_ctxsw_prog_set_priv_access_map_addr(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr) +{ + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_priv_access_map_addr_lo_o(), + u64_lo32(addr)); + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_priv_access_map_addr_hi_o(), + u64_hi32(addr)); +} + +void gm20b_ctxsw_prog_disable_verif_features(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + u32 data; + + data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_misc_options_o()); + + data = data & ~ctxsw_prog_main_image_misc_options_verif_features_m(); + data = data | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); + + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_misc_options_o(), data); +} + +bool gm20b_ctxsw_prog_check_main_image_header_magic(u8 *context) +{ + u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o()); + return magic == ctxsw_prog_main_image_magic_value_v_value_v(); +} + +bool gm20b_ctxsw_prog_check_local_header_magic(u8 *context) +{ + u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o()); + return magic == ctxsw_prog_local_magic_value_v_value_v(); +} + +u32 gm20b_ctxsw_prog_get_num_gpcs(u8 *context) +{ + return *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o()); +} + +u32 gm20b_ctxsw_prog_get_num_tpcs(u8 *context) +{ + return *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o()); +} + +void gm20b_ctxsw_prog_get_extended_buffer_size_offset(u8 *context, + u32 *size, u32 *offset) +{ + u32 data = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o()); + + *size = ctxsw_prog_main_extended_buffer_ctl_size_v(data); + *offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data); +} + +void gm20b_ctxsw_prog_get_ppc_info(u8 *context, u32 *num_ppcs, u32 *ppc_mask) +{ + u32 data = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o()); + + *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data); + *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data); +} + +u32 gm20b_ctxsw_prog_get_local_priv_register_ctl_offset(u8 *context) +{ + u32 data = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o()); + return ctxsw_prog_local_priv_register_ctl_offset_v(data); +} + +u32 gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp(void) +{ + return ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(); +} + +u32 gm20b_ctxsw_prog_hw_get_ts_tag(u64 ts) +{ + return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32)); +} + +u64 gm20b_ctxsw_prog_hw_record_ts_timestamp(u64 ts) +{ + return ts & + ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32); +} + +u32 gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes(void) +{ + return ctxsw_prog_record_timestamp_record_size_in_bytes_v(); +} + +u32 gm20b_ctxsw_prog_is_ts_valid_record(u32 magic_hi) +{ + return magic_hi == + ctxsw_prog_record_timestamp_magic_value_hi_v_value_v(); +} + +u32 gm20b_ctxsw_prog_get_ts_buffer_aperture_mask(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + return nvgpu_aperture_mask(g, ctx_mem, + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(), + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); +} + +void gm20b_ctxsw_prog_set_ts_num_records(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u32 num) +{ + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_context_timestamp_buffer_control_o(), + ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(num)); +} + +void gm20b_ctxsw_prog_set_ts_buffer_ptr(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr, u32 aperture_mask) +{ + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), + u64_lo32(addr)); + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), + ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(u64_hi32(addr)) | + aperture_mask); +} diff --git a/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.h b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.h new file mode 100644 index 000000000..fc2be9728 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CTXSW_PROG_GM20B_H +#define NVGPU_CTXSW_PROG_GM20B_H + +#include + +struct gk20a; +struct nvgpu_mem; + +u32 gm20b_ctxsw_prog_hw_get_fecs_header_size(void); +u32 gm20b_ctxsw_prog_hw_get_gpccs_header_size(void); +u32 gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes(void); +u32 gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes(void); +u32 gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride(void); +u32 gm20b_ctxsw_prog_get_main_image_ctx_id(struct gk20a *g, struct nvgpu_mem *ctx_mem); +u32 gm20b_ctxsw_prog_get_patch_count(struct gk20a *g, struct nvgpu_mem *ctx_mem); +void gm20b_ctxsw_prog_set_patch_count(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u32 count); +void gm20b_ctxsw_prog_set_patch_addr(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr); +void gm20b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem, + u64 addr); +void gm20b_ctxsw_prog_set_zcull(struct gk20a *g, struct nvgpu_mem *ctx_mem, + u32 mode); +void gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw(struct gk20a *g, + struct nvgpu_mem *ctx_mem); +bool gm20b_ctxsw_prog_is_zcull_mode_separate_buffer(u32 mode); +void gm20b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem, + u64 addr); +void gm20b_ctxsw_prog_set_pm_mode(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u32 mode); +void gm20b_ctxsw_prog_set_pm_smpc_mode(struct gk20a *g, + struct nvgpu_mem *ctx_mem, bool enable); +u32 gm20b_ctxsw_prog_set_pm_mode_no_ctxsw(struct gk20a *g, + struct nvgpu_mem *ctx_mem); +u32 gm20b_ctxsw_prog_set_pm_mode_ctxsw(struct gk20a *g, + struct nvgpu_mem *ctx_mem); +u32 gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw(void); +u32 gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw(void); +void gm20b_ctxsw_prog_init_ctxsw_hdr_data(struct gk20a *g, + struct nvgpu_mem *ctx_mem); +void gm20b_ctxsw_prog_set_compute_preemption_mode_cta(struct gk20a *g, + struct nvgpu_mem *ctx_mem); +void gm20b_ctxsw_prog_set_cde_enabled(struct gk20a *g, + struct nvgpu_mem *ctx_mem); +void gm20b_ctxsw_prog_set_pc_sampling(struct gk20a *g, + struct nvgpu_mem *ctx_mem, bool enable); +void gm20b_ctxsw_prog_set_priv_access_map_config_mode(struct gk20a *g, + struct nvgpu_mem *ctx_mem, bool allow_all); +void gm20b_ctxsw_prog_set_priv_access_map_addr(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr); +void gm20b_ctxsw_prog_disable_verif_features(struct gk20a *g, + struct nvgpu_mem *ctx_mem); +bool gm20b_ctxsw_prog_check_main_image_header_magic(u8 *context); +bool gm20b_ctxsw_prog_check_local_header_magic(u8 *context); +u32 gm20b_ctxsw_prog_get_num_gpcs(u8 *context); +u32 gm20b_ctxsw_prog_get_num_tpcs(u8 *context); +void gm20b_ctxsw_prog_get_extended_buffer_size_offset(u8 *context, + u32 *size, u32 *offset); +void gm20b_ctxsw_prog_get_ppc_info(u8 *context, u32 *num_ppcs, u32 *ppc_mask); +u32 gm20b_ctxsw_prog_get_local_priv_register_ctl_offset(u8 *context); +u32 gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp(void); +u32 gm20b_ctxsw_prog_hw_get_ts_tag(u64 ts); +u64 gm20b_ctxsw_prog_hw_record_ts_timestamp(u64 ts); +u32 gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes(void); +u32 gm20b_ctxsw_prog_is_ts_valid_record(u32 magic_hi); +u32 gm20b_ctxsw_prog_get_ts_buffer_aperture_mask(struct gk20a *g, + struct nvgpu_mem *ctx_mem); +void gm20b_ctxsw_prog_set_ts_num_records(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u32 num); +void gm20b_ctxsw_prog_set_ts_buffer_ptr(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr, u32 aperture_mask); + +#endif /* NVGPU_CTXSW_PROG_GM20B_H */ diff --git a/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gp10b.c b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gp10b.c new file mode 100644 index 000000000..4128a2e07 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gp10b.c @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "ctxsw_prog_gm20b.h" +#include "ctxsw_prog_gp10b.h" + +#include + +void gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_graphics_preemption_options_o(), + ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f()); +} + +void gp10b_ctxsw_prog_set_compute_preemption_mode_cta(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_compute_preemption_options_o(), + ctxsw_prog_main_image_compute_preemption_options_control_cta_f()); +} + +void gp10b_ctxsw_prog_set_compute_preemption_mode_cilp(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_compute_preemption_options_o(), + ctxsw_prog_main_image_compute_preemption_options_control_cilp_f()); +} + +void gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u32 boosted_ctx) +{ + u32 data = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f(boosted_ctx); + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pmu_options_o(), data); +} + +void gp10b_ctxsw_prog_set_full_preemption_ptr(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr) +{ + addr = addr >> 8; + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_full_preemption_ptr_o(), u64_lo32(addr)); +} + +void gp10b_ctxsw_prog_init_ctxsw_hdr_data(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_num_wfi_save_ops_o(), 0); + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_num_cta_save_ops_o(), 0); + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0); + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_num_cilp_save_ops_o(), 0); + + gm20b_ctxsw_prog_init_ctxsw_hdr_data(g, ctx_mem); +} + +void gp10b_ctxsw_prog_dump_ctxsw_stats(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + nvgpu_err(g, "ctxsw_prog_main_image_magic_value_o : %x (expect %x)", + nvgpu_mem_rd(g, ctx_mem, + ctxsw_prog_main_image_magic_value_o()), + ctxsw_prog_main_image_magic_value_v_value_v()); + + nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x", + nvgpu_mem_rd(g, ctx_mem, + ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o())); + + nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x", + nvgpu_mem_rd(g, ctx_mem, + ctxsw_prog_main_image_context_timestamp_buffer_ptr_o())); + + nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_control : %x", + nvgpu_mem_rd(g, ctx_mem, + ctxsw_prog_main_image_context_timestamp_buffer_control_o())); + + nvgpu_err(g, "NUM_SAVE_OPERATIONS : %d", + nvgpu_mem_rd(g, ctx_mem, + ctxsw_prog_main_image_num_save_ops_o())); + nvgpu_err(g, "WFI_SAVE_OPERATIONS : %d", + nvgpu_mem_rd(g, ctx_mem, + ctxsw_prog_main_image_num_wfi_save_ops_o())); + nvgpu_err(g, "CTA_SAVE_OPERATIONS : %d", + nvgpu_mem_rd(g, ctx_mem, + ctxsw_prog_main_image_num_cta_save_ops_o())); + nvgpu_err(g, "GFXP_SAVE_OPERATIONS : %d", + nvgpu_mem_rd(g, ctx_mem, + ctxsw_prog_main_image_num_gfxp_save_ops_o())); + nvgpu_err(g, "CILP_SAVE_OPERATIONS : %d", + nvgpu_mem_rd(g, ctx_mem, + ctxsw_prog_main_image_num_cilp_save_ops_o())); + nvgpu_err(g, + "image gfx preemption option (GFXP is 1) %x", + nvgpu_mem_rd(g, ctx_mem, + ctxsw_prog_main_image_graphics_preemption_options_o())); + nvgpu_err(g, + "image compute preemption option (CTA is 1) %x", + nvgpu_mem_rd(g, ctx_mem, + ctxsw_prog_main_image_compute_preemption_options_o())); +} diff --git a/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gp10b.h b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gp10b.h new file mode 100644 index 000000000..acac22a9f --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gp10b.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CTXSW_PROG_GP10B_H +#define NVGPU_CTXSW_PROG_GP10B_H + +#include + +struct gk20a; +struct nvgpu_mem; + +void gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp(struct gk20a *g, + struct nvgpu_mem *ctx_mem); +void gp10b_ctxsw_prog_set_compute_preemption_mode_cta(struct gk20a *g, + struct nvgpu_mem *ctx_mem); +void gp10b_ctxsw_prog_set_compute_preemption_mode_cilp(struct gk20a *g, + struct nvgpu_mem *ctx_mem); +void gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u32 boosted_ctx); +void gp10b_ctxsw_prog_set_full_preemption_ptr(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr); +void gp10b_ctxsw_prog_init_ctxsw_hdr_data(struct gk20a *g, + struct nvgpu_mem *ctx_mem); +void gp10b_ctxsw_prog_dump_ctxsw_stats(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + +#endif /* NVGPU_CTXSW_PROG_GP10B_H */ diff --git a/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.c b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.c new file mode 100644 index 000000000..aecc9cd4d --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.c @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "ctxsw_prog_gm20b.h" +#include "ctxsw_prog_gv11b.h" + +#include + +void gv11b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem, + u64 addr) +{ + addr = addr >> 8; + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_ptr_o(), + u64_lo32(addr)); + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_ptr_hi_o(), + u64_hi32(addr)); +} + +void gv11b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem, + u64 addr) +{ + addr = addr >> 8; + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_ptr_o(), + u64_lo32(addr)); + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_ptr_hi_o(), + u64_hi32(addr)); +} + +u32 gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw(void) +{ + return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); +} + +u32 gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + gm20b_ctxsw_prog_set_pm_mode(g, ctx_mem, + ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f()); + return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); +} + +void gv11b_ctxsw_prog_set_full_preemption_ptr(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr) +{ + addr = addr >> 8; + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_full_preemption_ptr_o(), + u64_lo32(addr)); + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_full_preemption_ptr_hi_o(), + u64_hi32(addr)); +} + +void gv11b_ctxsw_prog_set_full_preemption_ptr_veid0(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr) +{ + addr = addr >> 8; + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_full_preemption_ptr_veid0_o(), + u64_lo32(addr)); + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_full_preemption_ptr_veid0_hi_o(), + u64_hi32(addr)); +} + +u32 gv11b_ctxsw_prog_hw_get_perf_counter_register_stride(void) +{ + return ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); +} + +void gv11b_ctxsw_prog_set_context_buffer_ptr(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr) +{ + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_context_buffer_ptr_hi_o(), + u64_hi32(addr)); + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_context_buffer_ptr_o(), + u64_lo32(addr)); +} + +void gv11b_ctxsw_prog_set_type_per_veid_header(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + nvgpu_mem_wr(g, ctx_mem, + ctxsw_prog_main_image_ctl_o(), + ctxsw_prog_main_image_ctl_type_per_veid_header_v()); +} diff --git a/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.h b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.h new file mode 100644 index 000000000..852069979 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CTXSW_PROG_GV11B_H +#define NVGPU_CTXSW_PROG_GV11B_H + +#include + +void gv11b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem, + u64 addr); +void gv11b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem, + u64 addr); +u32 gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw(void); +u32 gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw(struct gk20a *g, + struct nvgpu_mem *ctx_mem); +void gv11b_ctxsw_prog_set_full_preemption_ptr(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr); +void gv11b_ctxsw_prog_set_full_preemption_ptr_veid0(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr); +u32 gv11b_ctxsw_prog_hw_get_perf_counter_register_stride(void); +void gv11b_ctxsw_prog_set_context_buffer_ptr(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr); +void gv11b_ctxsw_prog_set_type_per_veid_header(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + +#endif /* NVGPU_CTXSW_PROG_GV11B_H */ diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 08e9fa50e..e663a461d 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -43,7 +43,6 @@ #include #include -#include #include struct gk20a_fecs_trace_hash_ent { @@ -62,29 +61,14 @@ struct gk20a_fecs_trace { }; #ifdef CONFIG_GK20A_CTXSW_TRACE -u32 gk20a_fecs_trace_record_ts_tag_invalid_ts_v(void) -{ - return ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v(); -} - -u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts) -{ - return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32)); -} - -u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts) -{ - return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32); -} - static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch) { return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL); } -int gk20a_fecs_trace_num_ts(void) +int gk20a_fecs_trace_num_ts(struct gk20a *g) { - return (ctxsw_prog_record_timestamp_record_size_in_bytes_v() + return (g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes() - sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64); } @@ -94,18 +78,18 @@ struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record( struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem; return (struct gk20a_fecs_trace_record *) - ((u8 *) mem->cpu_va - + (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v())); + ((u8 *) mem->cpu_va + + (idx * g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes())); } -bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r) +bool gk20a_fecs_trace_is_valid_record(struct gk20a *g, + struct gk20a_fecs_trace_record *r) { /* * testing magic_hi should suffice. magic_lo is sometimes used * as a sequence number in experimental ucode. */ - return (r->magic_hi - == ctxsw_prog_record_timestamp_magic_value_hi_v_value_v()); + return g->ops.gr.ctxsw_prog.is_ts_valid_record(r->magic_hi); } int gk20a_fecs_trace_get_read_index(struct gk20a *g) @@ -254,7 +238,7 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index) nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, "consuming record trace=%p read=%d record=%p", trace, index, r); - if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) { + if (unlikely(!gk20a_fecs_trace_is_valid_record(g, r))) { nvgpu_warn(g, "trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)", trace, index, r, r->magic_lo, r->magic_hi); @@ -278,10 +262,11 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index) entry.vmid = vmid; /* break out FECS record into trace events */ - for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) { + for (i = 0; i < gk20a_fecs_trace_num_ts(g); i++) { - entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]); - entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]); + entry.tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]); + entry.timestamp = + g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]); entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; nvgpu_log(g, gpu_dbg_ctxsw, @@ -402,7 +387,7 @@ static int gk20a_fecs_trace_periodic_polling(void *arg) size_t gk20a_fecs_trace_buffer_size(struct gk20a *g) { return GK20A_FECS_TRACE_NUM_RECORDS - * ctxsw_prog_record_timestamp_record_size_in_bytes_v(); + * g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes(); } int gk20a_fecs_trace_init(struct gk20a *g) @@ -449,8 +434,6 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, * in the context header. */ - u32 lo; - u32 hi; u64 addr; struct gk20a_fecs_trace *trace = g->fecs_trace; struct nvgpu_mem *mem; @@ -475,37 +458,24 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, } else { addr = nvgpu_inst_block_addr(g, mem); nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr); - aperture_mask = nvgpu_aperture_mask(g, mem, - ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), - ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(), - ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); + aperture_mask = + g->ops.gr.ctxsw_prog.get_ts_buffer_aperture_mask(g, mem); } if (!addr) return -ENOMEM; - lo = u64_lo32(addr); - hi = u64_hi32(addr); - mem = &gr_ctx->mem; - nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi, - lo, GK20A_FECS_TRACE_NUM_RECORDS); + nvgpu_log(g, gpu_dbg_ctxsw, "addr=%llx count=%d", addr, + GK20A_FECS_TRACE_NUM_RECORDS); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_context_timestamp_buffer_control_o(), - ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f( - GK20A_FECS_TRACE_NUM_RECORDS)); + g->ops.gr.ctxsw_prog.set_ts_num_records(g, mem, + GK20A_FECS_TRACE_NUM_RECORDS); if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) mem = &ch->ctx_header; - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), - lo); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), - ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) | - aperture_mask); + g->ops.gr.ctxsw_prog.set_ts_buffer_ptr(g, mem, addr, aperture_mask); /* pid (process identifier) in user space, corresponds to tgid (thread * group id) in kernel space. @@ -573,7 +543,7 @@ int gk20a_gr_max_entries(struct gk20a *g, int tag; /* Compute number of entries per record, with given filter */ - for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++) + for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(g); tag++) n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0); /* Return max number of entries generated for the whole ring */ diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 0466fa17a..ccb4fd93d 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -53,7 +53,6 @@ #include "gr_pri_gk20a.h" #include "regops_gk20a.h" -#include #include #include #include @@ -93,7 +92,7 @@ u32 gr_gk20a_get_ctx_id(struct gk20a *g, struct nvgpu_mem *ctx_mem) Flush and invalidate before cpu update. */ g->ops.mm.l2_flush(g, true); - ctx_id = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_context_id_o()); + ctx_id = g->ops.gr.ctxsw_prog.get_main_image_ctx_id(g, ctx_mem); nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, "ctx_id: 0x%x", ctx_id); return ctx_id; } @@ -619,9 +618,8 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, { if (update_patch_count) { /* reset patch count if ucode has already processed it */ - gr_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, - &gr_ctx->mem, - ctxsw_prog_main_image_patch_count_o()); + gr_ctx->patch_ctx.data_count = + g->ops.gr.ctxsw_prog.get_patch_count(g, &gr_ctx->mem); nvgpu_log(g, gpu_dbg_info, "patch count reset to %d", gr_ctx->patch_ctx.data_count); } @@ -634,8 +632,7 @@ void gr_gk20a_ctx_patch_write_end(struct gk20a *g, { /* Write context count to context image if it is mapped */ if (update_patch_count) { - nvgpu_mem_wr(g, &gr_ctx->mem, - ctxsw_prog_main_image_patch_count_o(), + g->ops.gr.ctxsw_prog.set_patch_count(g, &gr_ctx->mem, gr_ctx->patch_ctx.data_count); nvgpu_log(g, gpu_dbg_info, "write patch count %d", gr_ctx->patch_ctx.data_count); @@ -710,24 +707,6 @@ int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g, return ret; } -void gr_gk20a_write_zcull_ptr(struct gk20a *g, - struct nvgpu_mem *mem, u64 gpu_va) -{ - u32 va = u64_lo32(gpu_va >> 8); - - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_zcull_ptr_o(), va); -} - -void gr_gk20a_write_pm_ptr(struct gk20a *g, - struct nvgpu_mem *mem, u64 gpu_va) -{ - u32 va = u64_lo32(gpu_va >> 8); - - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_pm_ptr_o(), va); -} - static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c, struct nvgpu_gr_ctx *gr_ctx) { @@ -740,8 +719,8 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c, mem = &gr_ctx->mem; if (gr_ctx->zcull_ctx.gpu_va == 0ULL && - gr_ctx->zcull_ctx.ctx_sw_mode == - ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) { + g->ops.gr.ctxsw_prog.is_zcull_mode_separate_buffer( + gr_ctx->zcull_ctx.ctx_sw_mode)) { return -EINVAL; } @@ -757,15 +736,14 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c, return ret; } - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_zcull_o(), - gr_ctx->zcull_ctx.ctx_sw_mode); + g->ops.gr.ctxsw_prog.set_zcull(g, mem, gr_ctx->zcull_ctx.ctx_sw_mode); if (ctxheader->gpu_va != 0ULL) { - g->ops.gr.write_zcull_ptr(g, ctxheader, - gr_ctx->zcull_ctx.gpu_va); + g->ops.gr.ctxsw_prog.set_zcull_ptr(g, ctxheader, + gr_ctx->zcull_ctx.gpu_va); } else { - g->ops.gr.write_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va); + g->ops.gr.ctxsw_prog.set_zcull_ptr(g, mem, + gr_ctx->zcull_ctx.gpu_va); } gk20a_enable_channel_tsg(g, c); @@ -1302,7 +1280,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx) { struct gr_gk20a *gr = &g->gr; - u32 ctx_header_bytes = ctxsw_prog_fecs_header_v(); + u32 ctx_header_bytes = g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(); u32 ctx_header_words; u32 i; u32 data; @@ -1497,10 +1475,9 @@ restore_fe_go_idle: data = nvgpu_mem_rd32(g, gr_mem, i); nvgpu_mem_wr32(g, gold_mem, i, data); } - nvgpu_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(), - ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); + g->ops.gr.ctxsw_prog.set_zcull_mode_no_ctxsw(g, gold_mem); - g->ops.gr.write_zcull_ptr(g, gold_mem, 0); + g->ops.gr.ctxsw_prog.set_zcull_ptr(g, gold_mem, 0); err = g->ops.gr.commit_inst(c, gr_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); if (err != 0) { @@ -1554,7 +1531,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, struct tsg_gk20a *tsg; struct nvgpu_gr_ctx *gr_ctx = NULL; struct nvgpu_mem *mem = NULL; - u32 data; int ret; nvgpu_log_fn(g, " "); @@ -1587,16 +1563,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, Flush and invalidate before cpu update. */ g->ops.mm.l2_flush(g, true); - data = nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_pm_o()); - - data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); - data |= enable_smpc_ctxsw ? - ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : - ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); - - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_pm_o(), data); + g->ops.gr.ctxsw_prog.set_pm_smpc_mode(g, mem, enable_smpc_ctxsw); out: gk20a_enable_channel_tsg(g, c); @@ -1612,7 +1579,6 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, struct nvgpu_mem *gr_mem = NULL; struct nvgpu_gr_ctx *gr_ctx; struct pm_ctx_desc *pm_ctx; - u32 data; u64 virt_addr = 0; struct nvgpu_mem *ctxheader = &c->ctx_header; int ret; @@ -1633,24 +1599,29 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, } if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) && - (g->ops.gr.get_hw_accessor_stream_out_mode == NULL)) { - nvgpu_err(g, "Mode-E hwpm context switch mode is not supported"); + (g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw == + NULL)) { + nvgpu_err(g, + "Mode-E hwpm context switch mode is not supported"); return -EINVAL; } switch (mode) { case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW: - if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { + if (pm_ctx->pm_mode == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw()) { return 0; } break; case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW: - if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) { + if (pm_ctx->pm_mode == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) { return 0; } break; case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW: - if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) { + if (pm_ctx->pm_mode == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) { return 0; } break; @@ -1711,37 +1682,34 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, } } - data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o()); - data = data & ~ctxsw_prog_main_image_pm_mode_m(); - switch (mode) { case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW: - pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f(); + pm_ctx->pm_mode = + g->ops.gr.ctxsw_prog.set_pm_mode_ctxsw(g, gr_mem); virt_addr = pm_ctx->mem.gpu_va; break; case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW: - pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode(); + pm_ctx->pm_mode = + g->ops.gr.ctxsw_prog.set_pm_mode_stream_out_ctxsw(g, gr_mem); virt_addr = pm_ctx->mem.gpu_va; break; case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW: - pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); + pm_ctx->pm_mode = + g->ops.gr.ctxsw_prog.set_pm_mode_no_ctxsw(g, gr_mem); virt_addr = 0; } - data |= pm_ctx->pm_mode; - - nvgpu_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data); - if (ctxheader->gpu_va != 0ULL) { struct channel_gk20a *ch; nvgpu_rwsem_down_read(&tsg->ch_list_lock); nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) { - g->ops.gr.write_pm_ptr(g, &ch->ctx_header, virt_addr); + g->ops.gr.ctxsw_prog.set_pm_ptr(g, &ch->ctx_header, + virt_addr); } nvgpu_rwsem_up_read(&tsg->ch_list_lock); } else { - g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr); + g->ops.gr.ctxsw_prog.set_pm_ptr(g, gr_mem, virt_addr); } /* enable channel */ @@ -1750,26 +1718,13 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, return 0; } -void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g, - struct nvgpu_mem *mem) -{ - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_num_save_ops_o(), 0); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_num_restore_ops_o(), 0); -} - /* load saved fresh copy of gloden image into channel gr_ctx */ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, struct channel_gk20a *c, struct nvgpu_gr_ctx *gr_ctx) { struct gr_gk20a *gr = &g->gr; - u32 virt_addr_lo; - u32 virt_addr_hi; u64 virt_addr = 0; - u32 v, data; - int ret = 0; struct nvgpu_mem *mem; nvgpu_log_fn(g, " "); @@ -1787,8 +1742,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, gr->ctx_vars.local_golden_image, gr->ctx_vars.golden_image_size); - if (g->ops.gr.init_ctxsw_hdr_data != NULL) { - g->ops.gr.init_ctxsw_hdr_data(g, mem); + if (g->ops.gr.ctxsw_prog.init_ctxsw_hdr_data != NULL) { + g->ops.gr.ctxsw_prog.init_ctxsw_hdr_data(g, mem); } if ((g->ops.gr.enable_cde_in_fecs != NULL) && c->cde) { @@ -1796,32 +1751,13 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, } /* set priv access map */ - virt_addr_lo = - u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); - virt_addr_hi = - u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); - - if (g->allow_all) { - data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(); - } else { - data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f(); - } - - nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(), - data); - - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_priv_access_map_addr_lo_o(), - virt_addr_lo); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_priv_access_map_addr_hi_o(), - virt_addr_hi); + g->ops.gr.ctxsw_prog.set_priv_access_map_config_mode(g, mem, + g->allow_all); + g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, mem, + gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); /* disable verif features */ - v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o()); - v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); - v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f(); - nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v); + g->ops.gr.ctxsw_prog.disable_verif_features(g, mem); if (g->ops.gr.update_ctxsw_preemption_mode != NULL) { g->ops.gr.update_ctxsw_preemption_mode(g, gr_ctx, &c->ctx_header); @@ -1831,26 +1767,19 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, g->ops.gr.update_boosted_ctx(g, mem, gr_ctx); } - virt_addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va); - virt_addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va); - nvgpu_log(g, gpu_dbg_info, "write patch count = %d", gr_ctx->patch_ctx.data_count); - nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), - gr_ctx->patch_ctx.data_count); - - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_patch_adr_lo_o(), - virt_addr_lo); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_patch_adr_hi_o(), - virt_addr_hi); + g->ops.gr.ctxsw_prog.set_patch_count(g, mem, + gr_ctx->patch_ctx.data_count); + g->ops.gr.ctxsw_prog.set_patch_addr(g, mem, + gr_ctx->patch_ctx.mem.gpu_va); /* Update main header region of the context buffer with the info needed * for PM context switching, including mode and possibly a pointer to * the PM backing store. */ - if (gr_ctx->pm_ctx.pm_mode != ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) { + if (gr_ctx->pm_ctx.pm_mode != + g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) { if (gr_ctx->pm_ctx.mem.gpu_va == 0ULL) { nvgpu_err(g, "context switched pm with no pm buffer!"); @@ -1862,15 +1791,10 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, virt_addr = 0; } - data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o()); - data = data & ~ctxsw_prog_main_image_pm_mode_m(); - data |= gr_ctx->pm_ctx.pm_mode; + g->ops.gr.ctxsw_prog.set_pm_mode(g, mem, gr_ctx->pm_ctx.pm_mode); + g->ops.gr.ctxsw_prog.set_pm_ptr(g, mem, virt_addr); - nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data); - - g->ops.gr.write_pm_ptr(g, mem, virt_addr); - - return ret; + return 0; } static void gr_gk20a_start_falcon_ucode(struct gk20a *g) @@ -2959,7 +2883,8 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) } /* PM ctxt switch is off by default */ - gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); + gr_ctx->pm_ctx.pm_mode = + g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw(); } else { /* commit gr ctx buffer */ err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); @@ -6654,8 +6579,6 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, u32 num_tpc; u32 tpc, gpc, reg; u32 chk_addr; - u32 vaddr_lo; - u32 vaddr_hi; u32 tmp; u32 num_ovr_perf_regs = 0; u32 *ovr_perf_regs = NULL; @@ -6682,8 +6605,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, /* reset the patch count from previous runs,if ucode has already processed it */ - tmp = nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_patch_count_o()); + tmp = g->ops.gr.ctxsw_prog.get_patch_count(g, + mem); if (tmp == 0U) { gr_ctx->patch_ctx.data_count = 0; @@ -6692,26 +6615,17 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, gr_gk20a_ctx_patch_write(g, gr_ctx, addr, data, true); - vaddr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va); - vaddr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va); + g->ops.gr.ctxsw_prog.set_patch_count(g, mem, + gr_ctx->patch_ctx.data_count); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_patch_count_o(), - gr_ctx->patch_ctx.data_count); if (ctxheader->gpu_va != 0ULL) { - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_patch_adr_lo_o(), - vaddr_lo); - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_patch_adr_hi_o(), - vaddr_hi); + g->ops.gr.ctxsw_prog.set_patch_addr(g, + ctxheader, + gr_ctx->patch_ctx.mem.gpu_va); } else { - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_patch_adr_lo_o(), - vaddr_lo); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_patch_adr_hi_o(), - vaddr_hi); + g->ops.gr.ctxsw_prog.set_patch_addr(g, + mem, + gr_ctx->patch_ctx.mem.gpu_va); } /* we're not caching these on cpu side, @@ -6726,24 +6640,6 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, #define ILLEGAL_ID ((u32)~0) -static inline bool check_main_image_header_magic(u8 *context) -{ - u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o()); - return magic == ctxsw_prog_main_image_magic_value_v_value_v(); -} -static inline bool check_local_header_magic(u8 *context) -{ - u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o()); - return magic == ctxsw_prog_local_magic_value_v_value_v(); - -} - -/* most likely dupe of ctxsw_gpccs_header__size_1_v() */ -static inline u32 ctxsw_prog_ucode_header_size_in_bytes(void) -{ - return 256U; -} - void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs, u32 **ovr_perf_regs) { @@ -6758,9 +6654,9 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, u32 context_buffer_size, u32 *priv_offset) { - u32 i, data32; + u32 i; u32 gpc_num, tpc_num; - u32 num_gpcs, num_tpcs; + u32 num_gpcs; u32 chk_addr; u32 ext_priv_offset, ext_priv_size; u8 *context; @@ -6809,18 +6705,18 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, return -EINVAL; } - buffer_segments_size = ctxsw_prog_extended_buffer_segments_size_in_bytes_v(); + buffer_segments_size = g->ops.gr.ctxsw_prog.hw_get_extended_buffer_segments_size_in_bytes(); /* note below is in words/num_registers */ - marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2; + marker_size = g->ops.gr.ctxsw_prog.hw_extended_marker_size_in_bytes() >> 2; context = (u8 *)context_buffer; /* sanity check main header */ - if (!check_main_image_header_magic(context)) { + if (!g->ops.gr.ctxsw_prog.check_main_image_header_magic(context)) { nvgpu_err(g, "Invalid main header: magic value"); return -EINVAL; } - num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o()); + num_gpcs = g->ops.gr.ctxsw_prog.get_num_gpcs(context); if (gpc_num >= num_gpcs) { nvgpu_err(g, "GPC 0x%08x is greater than total count 0x%08x!", @@ -6828,21 +6724,20 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, return -EINVAL; } - data32 = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o()); - ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32); + g->ops.gr.ctxsw_prog.get_extended_buffer_size_offset(context, + &ext_priv_size, &ext_priv_offset); if (0U == ext_priv_size) { nvgpu_log_info(g, " No extended memory in context buffer"); return -EINVAL; } - ext_priv_offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data32); - offset_to_segment = ext_priv_offset * ctxsw_prog_ucode_header_size_in_bytes(); + offset_to_segment = ext_priv_offset * 256U; offset_to_segment_end = offset_to_segment + (ext_priv_size * buffer_segments_size); /* check local header magic */ - context += ctxsw_prog_ucode_header_size_in_bytes(); - if (!check_local_header_magic(context)) { + context += g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(); + if (!g->ops.gr.ctxsw_prog.check_local_header_magic(context)) { nvgpu_err(g, "Invalid local header: magic value"); return -EINVAL; @@ -6937,8 +6832,6 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, offset_to_segment += (num_ext_gpccs_ext_buffer_segments * buffer_segments_size * gpc_num); - num_tpcs = g->gr.gpc_tpc_count[gpc_num]; - /* skip the head marker to start with */ inter_seg_offset = marker_size; @@ -6949,23 +6842,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, (tpc_num * control_register_stride) + sm_dsm_perf_ctrl_reg_id; } else { - /* skip all the control registers */ - inter_seg_offset = inter_seg_offset + - (num_tpcs * control_register_stride); - - /* skip the marker between control and counter segments */ - inter_seg_offset += marker_size; - - /* skip over counter regs of TPCs before the one we want */ - inter_seg_offset = inter_seg_offset + - (tpc_num * perf_register_stride) * - ctxsw_prog_extended_num_smpc_quadrants_v(); - - /* skip over the register for the quadrants we do not want. - * then skip to the register in this tpc */ - inter_seg_offset = inter_seg_offset + - (perf_register_stride * quad) + - sm_dsm_perf_reg_id; + return -EINVAL; } /* set the offset to the segment offset plus the inter segment offset to @@ -7146,7 +7023,6 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, u32 *num_ppcs, u32 *ppc_mask, u32 *reg_ppc_count) { - u32 data32; u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC); /* @@ -7159,11 +7035,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g, return -EINVAL; } - data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o()); - - *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32); - *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32); - + g->ops.gr.ctxsw_prog.get_ppc_info(context, num_ppcs, ppc_mask); *reg_ppc_count = g->netlist_vars->ctxsw_regs.ppc.count; return 0; @@ -7242,7 +7114,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, u32 context_buffer_size, u32 *priv_offset) { - u32 i, data32; + u32 i; int err; enum ctxsw_addr_type addr_type; u32 broadcast_flags; @@ -7267,22 +7139,23 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, } context = (u8 *)context_buffer; - if (!check_main_image_header_magic(context)) { + if (!g->ops.gr.ctxsw_prog.check_main_image_header_magic(context)) { nvgpu_err(g, "Invalid main header: magic value"); return -EINVAL; } - num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o()); + num_gpcs = g->ops.gr.ctxsw_prog.get_num_gpcs(context); /* Parse the FECS local header. */ - context += ctxsw_prog_ucode_header_size_in_bytes(); - if (!check_local_header_magic(context)) { + context += g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(); + if (!g->ops.gr.ctxsw_prog.check_local_header_magic(context)) { nvgpu_err(g, "Invalid FECS local header: magic value"); return -EINVAL; } - data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o()); - sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); + + sys_priv_offset = + g->ops.gr.ctxsw_prog.get_local_priv_register_ctl_offset(context); nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "sys_priv_offset=0x%x", sys_priv_offset); /* If found in Ext buffer, ok. @@ -7302,8 +7175,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, if ((addr_type == CTXSW_ADDR_TYPE_SYS) || (addr_type == CTXSW_ADDR_TYPE_BE)) { /* Find the offset in the FECS segment. */ - offset_to_segment = sys_priv_offset * - ctxsw_prog_ucode_header_size_in_bytes(); + offset_to_segment = sys_priv_offset * 256U; err = gr_gk20a_process_context_buffer_priv_segment(g, addr_type, addr, @@ -7326,15 +7198,14 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, /* Parse the GPCCS local header(s).*/ for (i = 0; i < num_gpcs; i++) { - context += ctxsw_prog_ucode_header_size_in_bytes(); - if (!check_local_header_magic(context)) { + context += g->ops.gr.ctxsw_prog.hw_get_gpccs_header_size(); + if (!g->ops.gr.ctxsw_prog.check_local_header_magic(context)) { nvgpu_err(g, "Invalid GPCCS local header: magic value"); return -EINVAL; } - data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o()); - gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32); + gpc_priv_offset = g->ops.gr.ctxsw_prog.get_local_priv_register_ctl_offset(context); err = gr_gk20a_determine_ppc_configuration(g, context, &num_ppcs, &ppc_mask, @@ -7345,7 +7216,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, } - num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o()); + num_tpcs = g->ops.gr.ctxsw_prog.get_num_tpcs(context); if ((i == gpc_num) && ((tpc_num + 1U) > num_tpcs)) { nvgpu_err(g, @@ -7359,8 +7230,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "gpc_priv_offset 0x%#08x", gpc_priv_offset); - offset_to_segment = gpc_priv_offset * - ctxsw_prog_ucode_header_size_in_bytes(); + offset_to_segment = gpc_priv_offset * 256U; err = g->ops.gr.get_offset_in_gpccs_segment(g, addr_type, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 41427f501..34b7a4e66 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -738,12 +738,6 @@ int gr_gk20a_init_sm_id_table(struct gk20a *g); int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va); -void gr_gk20a_write_zcull_ptr(struct gk20a *g, - struct nvgpu_mem *mem, u64 gpu_va); - -void gr_gk20a_write_pm_ptr(struct gk20a *g, - struct nvgpu_mem *mem, u64 gpu_va); - u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc); u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc); void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, @@ -751,8 +745,6 @@ void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, void gk20a_gr_init_ovr_sm_dsm_perf(void); void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs, u32 **ovr_perf_regs); -void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g, - struct nvgpu_mem *mem); u32 gr_gk20a_get_patch_slots(struct gk20a *g); int gk20a_gr_handle_notify_pending(struct gk20a *g, struct gr_gk20a_isr_data *isr_data); diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 02c0a06df..61682a38c 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -39,7 +39,6 @@ #include #include #include -#include #include void gr_gm20b_init_gpc_mmu(struct gk20a *g) @@ -537,7 +536,7 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs; *ctrl_register_stride = - ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); + g->ops.gr.ctxsw_prog.hw_get_perf_counter_control_register_stride(); } u32 gr_gm20b_get_gpc_mask(struct gk20a *g) @@ -908,16 +907,11 @@ int gr_gm20b_alloc_gr_ctx(struct gk20a *g, void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_mem *ctxheader) { - u32 cta_preempt_option = - ctxsw_prog_main_image_preemption_options_control_cta_enabled_f(); - nvgpu_log_fn(g, " "); if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) { - nvgpu_log_info(g, "CTA: %x", cta_preempt_option); - nvgpu_mem_wr(g, &gr_ctx->mem, - ctxsw_prog_main_image_preemption_options_o(), - cta_preempt_option); + g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g, + &gr_ctx->mem); } nvgpu_log_fn(g, "done"); @@ -1069,7 +1063,6 @@ int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, struct tsg_gk20a *tsg; struct nvgpu_gr_ctx *gr_ctx; struct nvgpu_mem *mem; - u32 v; nvgpu_log_fn(c->g, " "); @@ -1084,11 +1077,7 @@ int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, return -EINVAL; } - - v = nvgpu_mem_rd(c->g, mem, ctxsw_prog_main_image_pm_o()); - v &= ~ctxsw_prog_main_image_pm_pc_sampling_m(); - v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable); - nvgpu_mem_wr(c->g, mem, ctxsw_prog_main_image_pm_o(), v); + c->g->ops.gr.ctxsw_prog.set_pc_sampling(c->g, mem, enable); nvgpu_log_fn(c->g, "done"); @@ -1176,11 +1165,7 @@ void gr_gm20b_init_cyclestats(struct gk20a *g) void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem) { - u32 cde_v; - - cde_v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_ctl_o()); - cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f(); - nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v); + g->ops.gr.ctxsw_prog.set_cde_enabled(g, mem); } void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state) diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 3ff567165..f3b0fc9f1 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -41,6 +41,7 @@ #include "common/ptimer/ptimer_gk20a.h" #include "common/fb/fb_gm20b.h" #include "common/netlist/netlist_gm20b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" #include "common/therm/therm_gm20b.h" #include "common/ltc/ltc_gm20b.h" #include "common/fuse/fuse_gm20b.h" @@ -288,8 +289,6 @@ static const struct gpu_ops gm20b_ops = { .program_zcull_mapping = gr_gk20a_program_zcull_mapping, .commit_global_timeslice = gr_gk20a_commit_global_timeslice, .commit_inst = gr_gk20a_commit_inst, - .write_zcull_ptr = gr_gk20a_write_zcull_ptr, - .write_pm_ptr = gr_gk20a_write_pm_ptr, .load_tpc_mask = gr_gm20b_load_tpc_mask, .trigger_suspend = gr_gk20a_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -312,7 +311,6 @@ static const struct gpu_ops gm20b_ops = { .init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf, .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs, .disable_rd_coalesce = gm20a_gr_disable_rd_coalesce, - .init_ctxsw_hdr_data = gk20a_gr_init_ctxsw_hdr_data, .fecs_host_int_enable = gr_gk20a_fecs_host_int_enable, .handle_notify_pending = gk20a_gr_handle_notify_pending, .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending, @@ -335,6 +333,72 @@ static const struct gpu_ops gm20b_ops = { gk20a_gr_get_fecs_ctx_state_store_major_rev_id, .alloc_gfxp_rtv_cb = NULL, .commit_gfxp_rtv_cb = NULL, + .ctxsw_prog = { + .hw_get_fecs_header_size = + gm20b_ctxsw_prog_hw_get_fecs_header_size, + .hw_get_gpccs_header_size = + gm20b_ctxsw_prog_hw_get_gpccs_header_size, + .hw_get_extended_buffer_segments_size_in_bytes = + gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes, + .hw_extended_marker_size_in_bytes = + gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes, + .hw_get_perf_counter_control_register_stride = + gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride, + .get_main_image_ctx_id = + gm20b_ctxsw_prog_get_main_image_ctx_id, + .get_patch_count = gm20b_ctxsw_prog_get_patch_count, + .set_patch_count = gm20b_ctxsw_prog_set_patch_count, + .set_patch_addr = gm20b_ctxsw_prog_set_patch_addr, + .set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr, + .set_zcull = gm20b_ctxsw_prog_set_zcull, + .set_zcull_mode_no_ctxsw = + gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw, + .is_zcull_mode_separate_buffer = + gm20b_ctxsw_prog_is_zcull_mode_separate_buffer, + .set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr, + .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, + .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, + .set_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, + .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, + .hw_get_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, + .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, + .init_ctxsw_hdr_data = gm20b_ctxsw_prog_init_ctxsw_hdr_data, + .set_compute_preemption_mode_cta = + gm20b_ctxsw_prog_set_compute_preemption_mode_cta, + .set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled, + .set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling, + .set_priv_access_map_config_mode = + gm20b_ctxsw_prog_set_priv_access_map_config_mode, + .set_priv_access_map_addr = + gm20b_ctxsw_prog_set_priv_access_map_addr, + .disable_verif_features = + gm20b_ctxsw_prog_disable_verif_features, + .check_main_image_header_magic = + gm20b_ctxsw_prog_check_main_image_header_magic, + .check_local_header_magic = + gm20b_ctxsw_prog_check_local_header_magic, + .get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs, + .get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs, + .get_extended_buffer_size_offset = + gm20b_ctxsw_prog_get_extended_buffer_size_offset, + .get_ppc_info = gm20b_ctxsw_prog_get_ppc_info, + .get_local_priv_register_ctl_offset = + gm20b_ctxsw_prog_get_local_priv_register_ctl_offset, + .hw_get_ts_tag_invalid_timestamp = + gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp, + .hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag, + .hw_record_ts_timestamp = + gm20b_ctxsw_prog_hw_record_ts_timestamp, + .hw_get_ts_record_size_in_bytes = + gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes, + .is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record, + .get_ts_buffer_aperture_mask = + gm20b_ctxsw_prog_get_ts_buffer_aperture_mask, + .set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records, + .set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr, + } }, .fb = { .init_hw = gm20b_fb_init_hw, @@ -695,6 +759,7 @@ int gm20b_init_hal(struct gk20a *g) gops->ltc = gm20b_ops.ltc; gops->ce2 = gm20b_ops.ce2; gops->gr = gm20b_ops.gr; + gops->gr.ctxsw_prog = gm20b_ops.gr.ctxsw_prog; gops->fb = gm20b_ops.fb; gops->clock_gating = gm20b_ops.clock_gating; gops->fifo = gm20b_ops.fifo; diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 412649f08..2103582a6 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -33,6 +33,8 @@ #include "common/fb/fb_gm20b.h" #include "common/fb/fb_gp106.h" #include "common/netlist/netlist_gp106.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" #include "common/xve/xve_gp106.h" #include "common/therm/therm_gm20b.h" #include "common/therm/therm_gp106.h" @@ -354,8 +356,6 @@ static const struct gpu_ops gp106_ops = { .program_zcull_mapping = gr_gk20a_program_zcull_mapping, .commit_global_timeslice = gr_gk20a_commit_global_timeslice, .commit_inst = gr_gk20a_commit_inst, - .write_zcull_ptr = gr_gk20a_write_zcull_ptr, - .write_pm_ptr = gr_gk20a_write_pm_ptr, .load_tpc_mask = gr_gm20b_load_tpc_mask, .trigger_suspend = gr_gk20a_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -412,6 +412,81 @@ static const struct gpu_ops gp106_ops = { gk20a_gr_get_fecs_ctx_state_store_major_rev_id, .alloc_gfxp_rtv_cb = NULL, .commit_gfxp_rtv_cb = NULL, + .ctxsw_prog = { + .hw_get_fecs_header_size = + gm20b_ctxsw_prog_hw_get_fecs_header_size, + .hw_get_gpccs_header_size = + gm20b_ctxsw_prog_hw_get_gpccs_header_size, + .hw_get_extended_buffer_segments_size_in_bytes = + gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes, + .hw_extended_marker_size_in_bytes = + gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes, + .hw_get_perf_counter_control_register_stride = + gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride, + .get_main_image_ctx_id = + gm20b_ctxsw_prog_get_main_image_ctx_id, + .get_patch_count = gm20b_ctxsw_prog_get_patch_count, + .set_patch_count = gm20b_ctxsw_prog_set_patch_count, + .set_patch_addr = gm20b_ctxsw_prog_set_patch_addr, + .set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr, + .set_zcull = gm20b_ctxsw_prog_set_zcull, + .set_zcull_mode_no_ctxsw = + gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw, + .is_zcull_mode_separate_buffer = + gm20b_ctxsw_prog_is_zcull_mode_separate_buffer, + .set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr, + .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, + .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, + .set_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, + .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, + .hw_get_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, + .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, + .init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data, + .set_compute_preemption_mode_cta = + gp10b_ctxsw_prog_set_compute_preemption_mode_cta, + .set_compute_preemption_mode_cilp = + gp10b_ctxsw_prog_set_compute_preemption_mode_cilp, + .set_graphics_preemption_mode_gfxp = + gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp, + .set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled, + .set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling, + .set_priv_access_map_config_mode = + gm20b_ctxsw_prog_set_priv_access_map_config_mode, + .set_priv_access_map_addr = + gm20b_ctxsw_prog_set_priv_access_map_addr, + .disable_verif_features = + gm20b_ctxsw_prog_disable_verif_features, + .check_main_image_header_magic = + gm20b_ctxsw_prog_check_main_image_header_magic, + .check_local_header_magic = + gm20b_ctxsw_prog_check_local_header_magic, + .get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs, + .get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs, + .get_extended_buffer_size_offset = + gm20b_ctxsw_prog_get_extended_buffer_size_offset, + .get_ppc_info = gm20b_ctxsw_prog_get_ppc_info, + .get_local_priv_register_ctl_offset = + gm20b_ctxsw_prog_get_local_priv_register_ctl_offset, + .hw_get_ts_tag_invalid_timestamp = + gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp, + .hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag, + .hw_record_ts_timestamp = + gm20b_ctxsw_prog_hw_record_ts_timestamp, + .hw_get_ts_record_size_in_bytes = + gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes, + .is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record, + .get_ts_buffer_aperture_mask = + gm20b_ctxsw_prog_get_ts_buffer_aperture_mask, + .set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records, + .set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr, + .set_pmu_options_boost_clock_frequencies = + gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies, + .set_full_preemption_ptr = + gp10b_ctxsw_prog_set_full_preemption_ptr, + .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, + } }, .fb = { .init_hw = gm20b_fb_init_hw, @@ -848,6 +923,7 @@ int gp106_init_hal(struct gk20a *g) gops->ltc = gp106_ops.ltc; gops->ce2 = gp106_ops.ce2; gops->gr = gp106_ops.gr; + gops->gr.ctxsw_prog = gp106_ops.gr.ctxsw_prog; gops->fb = gp106_ops.fb; gops->clock_gating = gp106_ops.clock_gating; gops->fifo = gp106_ops.fifo; diff --git a/drivers/gpu/nvgpu/gp10b/fecs_trace_gp10b.c b/drivers/gpu/nvgpu/gp10b/fecs_trace_gp10b.c index 567411041..b872f8313 100644 --- a/drivers/gpu/nvgpu/gp10b/fecs_trace_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/fecs_trace_gp10b.c @@ -28,7 +28,6 @@ #include "fecs_trace_gp10b.h" -#include #include #ifdef CONFIG_GK20A_CTXSW_TRACE diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index d90e4f102..c04a2295e 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -44,7 +44,6 @@ #include #include -#include #define GFXP_WFI_TIMEOUT_COUNT_DEFAULT 100000U @@ -1134,83 +1133,27 @@ fail_free_gk20a_ctx: void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) { - struct nvgpu_mem *mem = &gr_ctx->mem; - - nvgpu_err(g, "ctxsw_prog_main_image_magic_value_o : %x (expect %x)", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_magic_value_o()), - ctxsw_prog_main_image_magic_value_v_value_v()); - - nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o())); - - nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_context_timestamp_buffer_ptr_o())); - - nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_control : %x", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_context_timestamp_buffer_control_o())); - - nvgpu_err(g, "NUM_SAVE_OPERATIONS : %d", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_num_save_ops_o())); - nvgpu_err(g, "WFI_SAVE_OPERATIONS : %d", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_num_wfi_save_ops_o())); - nvgpu_err(g, "CTA_SAVE_OPERATIONS : %d", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_num_cta_save_ops_o())); - nvgpu_err(g, "GFXP_SAVE_OPERATIONS : %d", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_num_gfxp_save_ops_o())); - nvgpu_err(g, "CILP_SAVE_OPERATIONS : %d", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_num_cilp_save_ops_o())); - nvgpu_err(g, - "image gfx preemption option (GFXP is 1) %x", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_graphics_preemption_options_o())); - nvgpu_err(g, - "image compute preemption option (CTA is 1) %x", - nvgpu_mem_rd(g, mem, - ctxsw_prog_main_image_compute_preemption_options_o())); + g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g, &gr_ctx->mem); } void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_mem *ctxheader) { struct nvgpu_mem *mem = &gr_ctx->mem; - u32 gfxp_preempt_option = - ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); - u32 cilp_preempt_option = - ctxsw_prog_main_image_compute_preemption_options_control_cilp_f(); - u32 cta_preempt_option = - ctxsw_prog_main_image_compute_preemption_options_control_cta_f(); int err; nvgpu_log_fn(g, " "); if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { - nvgpu_log_info(g, "GfxP: %x", gfxp_preempt_option); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_graphics_preemption_options_o(), - gfxp_preempt_option); + g->ops.gr.ctxsw_prog.set_graphics_preemption_mode_gfxp(g, mem); } if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP) { - nvgpu_log_info(g, "CILP: %x", cilp_preempt_option); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_compute_preemption_options_o(), - cilp_preempt_option); + g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cilp(g, mem); } if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) { - nvgpu_log_info(g, "CTA: %x", cta_preempt_option); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_compute_preemption_options_o(), - cta_preempt_option); + g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g, mem); } if (gr_ctx->preempt_ctxsw_buffer.gpu_va != 0ULL) { @@ -2226,12 +2169,10 @@ enable_ch: } void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, - struct nvgpu_gr_ctx *gr_ctx) { - u32 v; - - v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f( + struct nvgpu_gr_ctx *gr_ctx) +{ + g->ops.gr.ctxsw_prog.set_pmu_options_boost_clock_frequencies(g, mem, gr_ctx->boosted_ctx); - nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pmu_options_o(), v); } int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, @@ -2357,11 +2298,7 @@ int gr_gp10b_init_preemption_state(struct gk20a *g) void gr_gp10b_set_preemption_buffer_va(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va) { - u32 va = u64_lo32(gpu_va >> 8); - - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_full_preemption_ptr_o(), va); - + g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, mem, gpu_va); } void gr_gp10b_init_czf_bypass(struct gk20a *g) @@ -2386,20 +2323,6 @@ int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch) return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false); } -void gr_gp10b_init_ctxsw_hdr_data(struct gk20a *g, struct nvgpu_mem *mem) -{ - gk20a_gr_init_ctxsw_hdr_data(g, mem); - - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_num_wfi_save_ops_o(), 0); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_num_cta_save_ops_o(), 0); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_num_cilp_save_ops_o(), 0); -} - void gr_gp10b_init_gfxp_wfi_timeout_count(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h index 623bc1df7..fec1c818f 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h @@ -147,7 +147,6 @@ void gr_gp10b_set_preemption_buffer_va(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va); int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch); void gr_gp10b_init_czf_bypass(struct gk20a *g); -void gr_gp10b_init_ctxsw_hdr_data(struct gk20a *g, struct nvgpu_mem *mem); void gr_gp10b_init_gfxp_wfi_timeout_count(struct gk20a *g); unsigned long gr_gp10b_get_max_gfxp_wfi_timeout_count(struct gk20a *g); bool gr_gp10b_suspend_context(struct channel_gk20a *ch, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 3f0ad1659..f820c307f 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -45,6 +45,8 @@ #include "common/fb/fb_gm20b.h" #include "common/fb/fb_gp10b.h" #include "common/netlist/netlist_gp10b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" #include "common/therm/therm_gm20b.h" #include "common/therm/therm_gp10b.h" #include "common/ltc/ltc_gm20b.h" @@ -310,8 +312,6 @@ static const struct gpu_ops gp10b_ops = { .program_zcull_mapping = gr_gk20a_program_zcull_mapping, .commit_global_timeslice = gr_gk20a_commit_global_timeslice, .commit_inst = gr_gk20a_commit_inst, - .write_zcull_ptr = gr_gk20a_write_zcull_ptr, - .write_pm_ptr = gr_gk20a_write_pm_ptr, .load_tpc_mask = gr_gm20b_load_tpc_mask, .trigger_suspend = gr_gk20a_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -345,7 +345,6 @@ static const struct gpu_ops gp10b_ops = { .set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3, .set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode, .init_ecc = gp10b_ecc_init, - .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data, .init_gfxp_wfi_timeout_count = gr_gp10b_init_gfxp_wfi_timeout_count, .get_max_gfxp_wfi_timeout_count = @@ -373,6 +372,81 @@ static const struct gpu_ops gp10b_ops = { gk20a_gr_get_fecs_ctx_state_store_major_rev_id, .alloc_gfxp_rtv_cb = NULL, .commit_gfxp_rtv_cb = NULL, + .ctxsw_prog = { + .hw_get_fecs_header_size = + gm20b_ctxsw_prog_hw_get_fecs_header_size, + .hw_get_gpccs_header_size = + gm20b_ctxsw_prog_hw_get_gpccs_header_size, + .hw_get_extended_buffer_segments_size_in_bytes = + gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes, + .hw_extended_marker_size_in_bytes = + gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes, + .hw_get_perf_counter_control_register_stride = + gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride, + .get_main_image_ctx_id = + gm20b_ctxsw_prog_get_main_image_ctx_id, + .get_patch_count = gm20b_ctxsw_prog_get_patch_count, + .set_patch_count = gm20b_ctxsw_prog_set_patch_count, + .set_patch_addr = gm20b_ctxsw_prog_set_patch_addr, + .set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr, + .set_zcull = gm20b_ctxsw_prog_set_zcull, + .set_zcull_mode_no_ctxsw = + gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw, + .is_zcull_mode_separate_buffer = + gm20b_ctxsw_prog_is_zcull_mode_separate_buffer, + .set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr, + .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, + .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, + .set_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, + .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, + .hw_get_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, + .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, + .init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data, + .set_compute_preemption_mode_cta = + gp10b_ctxsw_prog_set_compute_preemption_mode_cta, + .set_compute_preemption_mode_cilp = + gp10b_ctxsw_prog_set_compute_preemption_mode_cilp, + .set_graphics_preemption_mode_gfxp = + gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp, + .set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled, + .set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling, + .set_priv_access_map_config_mode = + gm20b_ctxsw_prog_set_priv_access_map_config_mode, + .set_priv_access_map_addr = + gm20b_ctxsw_prog_set_priv_access_map_addr, + .disable_verif_features = + gm20b_ctxsw_prog_disable_verif_features, + .check_main_image_header_magic = + gm20b_ctxsw_prog_check_main_image_header_magic, + .check_local_header_magic = + gm20b_ctxsw_prog_check_local_header_magic, + .get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs, + .get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs, + .get_extended_buffer_size_offset = + gm20b_ctxsw_prog_get_extended_buffer_size_offset, + .get_ppc_info = gm20b_ctxsw_prog_get_ppc_info, + .get_local_priv_register_ctl_offset = + gm20b_ctxsw_prog_get_local_priv_register_ctl_offset, + .hw_get_ts_tag_invalid_timestamp = + gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp, + .hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag, + .hw_record_ts_timestamp = + gm20b_ctxsw_prog_hw_record_ts_timestamp, + .hw_get_ts_record_size_in_bytes = + gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes, + .is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record, + .get_ts_buffer_aperture_mask = + gm20b_ctxsw_prog_get_ts_buffer_aperture_mask, + .set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records, + .set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr, + .set_pmu_options_boost_clock_frequencies = + gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies, + .set_full_preemption_ptr = + gp10b_ctxsw_prog_set_full_preemption_ptr, + .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, + } }, .fb = { .init_hw = gm20b_fb_init_hw, @@ -763,6 +837,7 @@ int gp10b_init_hal(struct gk20a *g) gops->ltc = gp10b_ops.ltc; gops->ce2 = gp10b_ops.ce2; gops->gr = gp10b_ops.gr; + gops->gr.ctxsw_prog = gp10b_ops.gr.ctxsw_prog; gops->fb = gp10b_ops.fb; gops->clock_gating = gp10b_ops.clock_gating; gops->fifo = gp10b_ops.fifo; diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index b245c9383..0f93c5701 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -37,7 +37,6 @@ #include #include #include -#include #include @@ -429,11 +428,6 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, } } -u32 gr_gv100_get_hw_accessor_stream_out_mode(void) -{ - return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); -} - void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, u32 num_chiplets, u32 num_perfmons) { diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.h b/drivers/gpu/nvgpu/gv100/gr_gv100.h index acd100e87..18c6d39ab 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.h +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.h @@ -46,7 +46,6 @@ int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map, void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr, u32 num_fbpas, u32 *priv_addr_table, u32 *t); -u32 gr_gv100_get_hw_accessor_stream_out_mode(void); void gr_gv100_init_hwpm_pmm_register(struct gk20a *g); void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, u32 num_chiplets, u32 num_perfmons); diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 52ab46328..6e07aebbb 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -36,6 +36,9 @@ #include "common/fb/fb_gv100.h" #include "common/xve/xve_gp106.h" #include "common/netlist/netlist_gv100.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h" #include "common/therm/therm_gm20b.h" #include "common/therm/therm_gp106.h" #include "common/therm/therm_gp10b.h" @@ -398,8 +401,6 @@ static const struct gpu_ops gv100_ops = { .enable_exceptions = gr_gv11b_enable_exceptions, .get_lrf_tex_ltc_dram_override = get_ecc_override_val, .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, - .get_hw_accessor_stream_out_mode = - gr_gv100_get_hw_accessor_stream_out_mode, .get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon, .set_pmm_register = gr_gv100_set_pmm_register, .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, @@ -416,8 +417,6 @@ static const struct gpu_ops gv100_ops = { .program_zcull_mapping = gr_gv11b_program_zcull_mapping, .commit_global_timeslice = gr_gv11b_commit_global_timeslice, .commit_inst = gr_gv11b_commit_inst, - .write_zcull_ptr = gr_gv11b_write_zcull_ptr, - .write_pm_ptr = gr_gv11b_write_pm_ptr, .load_tpc_mask = gr_gv11b_load_tpc_mask, .trigger_suspend = gv11b_gr_sm_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -493,6 +492,93 @@ static const struct gpu_ops gv100_ops = { gk20a_gr_get_fecs_ctx_state_store_major_rev_id, .alloc_gfxp_rtv_cb = NULL, .commit_gfxp_rtv_cb = NULL, + .ctxsw_prog = { + .hw_get_fecs_header_size = + gm20b_ctxsw_prog_hw_get_fecs_header_size, + .hw_get_gpccs_header_size = + gm20b_ctxsw_prog_hw_get_gpccs_header_size, + .hw_get_extended_buffer_segments_size_in_bytes = + gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes, + .hw_extended_marker_size_in_bytes = + gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes, + .hw_get_perf_counter_control_register_stride = + gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride, + .get_main_image_ctx_id = + gm20b_ctxsw_prog_get_main_image_ctx_id, + .get_patch_count = gm20b_ctxsw_prog_get_patch_count, + .set_patch_count = gm20b_ctxsw_prog_set_patch_count, + .set_patch_addr = gm20b_ctxsw_prog_set_patch_addr, + .set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr, + .set_zcull = gm20b_ctxsw_prog_set_zcull, + .set_zcull_mode_no_ctxsw = + gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw, + .is_zcull_mode_separate_buffer = + gm20b_ctxsw_prog_is_zcull_mode_separate_buffer, + .set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr, + .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, + .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, + .set_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, + .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, + .hw_get_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, + .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, + .hw_get_pm_mode_stream_out_ctxsw = + gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw, + .set_pm_mode_stream_out_ctxsw = + gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw, + .init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data, + .set_compute_preemption_mode_cta = + gp10b_ctxsw_prog_set_compute_preemption_mode_cta, + .set_compute_preemption_mode_cilp = + gp10b_ctxsw_prog_set_compute_preemption_mode_cilp, + .set_graphics_preemption_mode_gfxp = + gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp, + .set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled, + .set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling, + .set_priv_access_map_config_mode = + gm20b_ctxsw_prog_set_priv_access_map_config_mode, + .set_priv_access_map_addr = + gm20b_ctxsw_prog_set_priv_access_map_addr, + .disable_verif_features = + gm20b_ctxsw_prog_disable_verif_features, + .check_main_image_header_magic = + gm20b_ctxsw_prog_check_main_image_header_magic, + .check_local_header_magic = + gm20b_ctxsw_prog_check_local_header_magic, + .get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs, + .get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs, + .get_extended_buffer_size_offset = + gm20b_ctxsw_prog_get_extended_buffer_size_offset, + .get_ppc_info = gm20b_ctxsw_prog_get_ppc_info, + .get_local_priv_register_ctl_offset = + gm20b_ctxsw_prog_get_local_priv_register_ctl_offset, + .hw_get_ts_tag_invalid_timestamp = + gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp, + .hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag, + .hw_record_ts_timestamp = + gm20b_ctxsw_prog_hw_record_ts_timestamp, + .hw_get_ts_record_size_in_bytes = + gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes, + .is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record, + .get_ts_buffer_aperture_mask = + gm20b_ctxsw_prog_get_ts_buffer_aperture_mask, + .set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records, + .set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr, + .set_pmu_options_boost_clock_frequencies = + gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies, + .set_full_preemption_ptr = + gv11b_ctxsw_prog_set_full_preemption_ptr, + .set_full_preemption_ptr_veid0 = + gv11b_ctxsw_prog_set_full_preemption_ptr_veid0, + .hw_get_perf_counter_register_stride = + gv11b_ctxsw_prog_hw_get_perf_counter_register_stride, + .set_context_buffer_ptr = + gv11b_ctxsw_prog_set_context_buffer_ptr, + .set_type_per_veid_header = + gv11b_ctxsw_prog_set_type_per_veid_header, + .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, + } }, .fb = { .init_hw = gv11b_fb_init_hw, @@ -1001,6 +1087,7 @@ int gv100_init_hal(struct gk20a *g) gops->ltc = gv100_ops.ltc; gops->ce2 = gv100_ops.ce2; gops->gr = gv100_ops.gr; + gops->gr.ctxsw_prog = gv100_ops.gr.ctxsw_prog; gops->fb = gv100_ops.fb; gops->clock_gating = gv100_ops.clock_gating; gops->fifo = gv100_ops.fifo; diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index f6cc2117b..84e3f2d99 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -56,7 +56,6 @@ #include #include #include -#include #include #include @@ -1663,38 +1662,23 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_mem *ctxheader) { struct nvgpu_mem *mem = &gr_ctx->mem; - u32 gfxp_preempt_option = - ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); - u32 cilp_preempt_option = - ctxsw_prog_main_image_compute_preemption_options_control_cilp_f(); - u32 cta_preempt_option = - ctxsw_prog_main_image_compute_preemption_options_control_cta_f(); int err; nvgpu_log_fn(g, " "); if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { - nvgpu_log_info(g, "GfxP: %x", gfxp_preempt_option); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_graphics_preemption_options_o(), - gfxp_preempt_option); + g->ops.gr.ctxsw_prog.set_graphics_preemption_mode_gfxp(g, mem); } if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP) { - nvgpu_log_info(g, "CILP: %x", cilp_preempt_option); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_compute_preemption_options_o(), - cilp_preempt_option); + g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cilp(g, mem); } if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) { - nvgpu_log_info(g, "CTA: %x", cta_preempt_option); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_compute_preemption_options_o(), - cta_preempt_option); + g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g, mem); } if (gr_ctx->preempt_ctxsw_buffer.gpu_va != 0ULL) { @@ -2947,35 +2931,6 @@ int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) return 0; } -void gr_gv11b_write_zcull_ptr(struct gk20a *g, - struct nvgpu_mem *mem, u64 gpu_va) -{ - u32 va_lo, va_hi; - - gpu_va = gpu_va >> 8; - va_lo = u64_lo32(gpu_va); - va_hi = u64_hi32(gpu_va); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_zcull_ptr_o(), va_lo); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_zcull_ptr_hi_o(), va_hi); -} - - -void gr_gv11b_write_pm_ptr(struct gk20a *g, - struct nvgpu_mem *mem, u64 gpu_va) -{ - u32 va_lo, va_hi; - - gpu_va = gpu_va >> 8; - va_lo = u64_lo32(gpu_va); - va_hi = u64_hi32(gpu_va); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_pm_ptr_o(), va_lo); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_pm_ptr_hi_o(), va_hi); -} - void gr_gv11b_load_tpc_mask(struct gk20a *g) { u32 pes_tpc_mask = 0, fuse_tpc_mask; @@ -3009,25 +2964,9 @@ void gr_gv11b_load_tpc_mask(struct gk20a *g) void gr_gv11b_set_preemption_buffer_va(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va) { - u32 addr_lo, addr_hi; - /* gpu va still needs to be 8 bit aligned */ - gpu_va = gpu_va >> 8; - - addr_lo = u64_lo32(gpu_va); - addr_hi = u64_hi32(gpu_va); - - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_full_preemption_ptr_o(), addr_lo); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_full_preemption_ptr_hi_o(), addr_hi); - - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_full_preemption_ptr_veid0_o(), addr_lo); - nvgpu_mem_wr(g, mem, - ctxsw_prog_main_image_full_preemption_ptr_veid0_hi_o(), - addr_hi); - + g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, mem, gpu_va); + g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g, mem, gpu_va); } int gr_gv11b_init_fs_state(struct gk20a *g) @@ -3936,7 +3875,7 @@ void gv11b_gr_get_sm_dsm_perf_regs(struct gk20a *g, *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs; *sm_dsm_perf_regs = _sm_dsm_perf_regs; *perf_register_stride = - ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); + g->ops.gr.ctxsw_prog.hw_get_perf_counter_register_stride(); } void gv11b_gr_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, @@ -3947,7 +3886,7 @@ void gv11b_gr_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs; *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs; *ctrl_register_stride = - ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); + g->ops.gr.ctxsw_prog.hw_get_perf_counter_control_register_stride(); } void gv11b_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs, diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index df97c9cce..b82ddc90f 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -152,10 +152,6 @@ void gr_gv11b_program_sm_id_numbering(struct gk20a *g, int gr_gv11b_load_smid_config(struct gk20a *g); int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va); int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c); -void gr_gv11b_write_zcull_ptr(struct gk20a *g, - struct nvgpu_mem *mem, u64 gpu_va); -void gr_gv11b_write_pm_ptr(struct gk20a *g, - struct nvgpu_mem *mem, u64 gpu_va); void gr_gv11b_load_tpc_mask(struct gk20a *g); void gr_gv11b_set_preemption_buffer_va(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va); diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 8c80f59f2..c0850acfe 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -35,6 +35,9 @@ #include "common/fb/fb_gp10b.h" #include "common/fb/fb_gv11b.h" #include "common/netlist/netlist_gv11b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h" #include "common/therm/therm_gm20b.h" #include "common/therm/therm_gp10b.h" #include "common/therm/therm_gv11b.h" @@ -350,8 +353,6 @@ static const struct gpu_ops gv11b_ops = { .enable_exceptions = gr_gv11b_enable_exceptions, .get_lrf_tex_ltc_dram_override = get_ecc_override_val, .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, - .get_hw_accessor_stream_out_mode = - gr_gv100_get_hw_accessor_stream_out_mode, .get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon, .set_pmm_register = gr_gv100_set_pmm_register, .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, @@ -368,8 +369,6 @@ static const struct gpu_ops gv11b_ops = { .program_zcull_mapping = gr_gv11b_program_zcull_mapping, .commit_global_timeslice = gr_gv11b_commit_global_timeslice, .commit_inst = gr_gv11b_commit_inst, - .write_zcull_ptr = gr_gv11b_write_zcull_ptr, - .write_pm_ptr = gr_gv11b_write_pm_ptr, .load_tpc_mask = gr_gv11b_load_tpc_mask, .trigger_suspend = gv11b_gr_sm_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -424,7 +423,6 @@ static const struct gpu_ops gv11b_ops = { .handle_tpc_sm_ecc_exception = gr_gv11b_handle_tpc_sm_ecc_exception, .decode_egpc_addr = gv11b_gr_decode_egpc_addr, - .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data, .init_gfxp_wfi_timeout_count = gr_gv11b_init_gfxp_wfi_timeout_count, .get_max_gfxp_wfi_timeout_count = @@ -455,6 +453,93 @@ static const struct gpu_ops gv11b_ops = { gk20a_gr_get_fecs_ctx_state_store_major_rev_id, .alloc_gfxp_rtv_cb = NULL, .commit_gfxp_rtv_cb = NULL, + .ctxsw_prog = { + .hw_get_fecs_header_size = + gm20b_ctxsw_prog_hw_get_fecs_header_size, + .hw_get_gpccs_header_size = + gm20b_ctxsw_prog_hw_get_gpccs_header_size, + .hw_get_extended_buffer_segments_size_in_bytes = + gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes, + .hw_extended_marker_size_in_bytes = + gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes, + .hw_get_perf_counter_control_register_stride = + gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride, + .get_main_image_ctx_id = + gm20b_ctxsw_prog_get_main_image_ctx_id, + .get_patch_count = gm20b_ctxsw_prog_get_patch_count, + .set_patch_count = gm20b_ctxsw_prog_set_patch_count, + .set_patch_addr = gm20b_ctxsw_prog_set_patch_addr, + .set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr, + .set_zcull = gm20b_ctxsw_prog_set_zcull, + .set_zcull_mode_no_ctxsw = + gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw, + .is_zcull_mode_separate_buffer = + gm20b_ctxsw_prog_is_zcull_mode_separate_buffer, + .set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr, + .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, + .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, + .set_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, + .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, + .hw_get_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, + .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, + .hw_get_pm_mode_stream_out_ctxsw = + gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw, + .set_pm_mode_stream_out_ctxsw = + gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw, + .init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data, + .set_compute_preemption_mode_cta = + gp10b_ctxsw_prog_set_compute_preemption_mode_cta, + .set_compute_preemption_mode_cilp = + gp10b_ctxsw_prog_set_compute_preemption_mode_cilp, + .set_graphics_preemption_mode_gfxp = + gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp, + .set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled, + .set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling, + .set_priv_access_map_config_mode = + gm20b_ctxsw_prog_set_priv_access_map_config_mode, + .set_priv_access_map_addr = + gm20b_ctxsw_prog_set_priv_access_map_addr, + .disable_verif_features = + gm20b_ctxsw_prog_disable_verif_features, + .check_main_image_header_magic = + gm20b_ctxsw_prog_check_main_image_header_magic, + .check_local_header_magic = + gm20b_ctxsw_prog_check_local_header_magic, + .get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs, + .get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs, + .get_extended_buffer_size_offset = + gm20b_ctxsw_prog_get_extended_buffer_size_offset, + .get_ppc_info = gm20b_ctxsw_prog_get_ppc_info, + .get_local_priv_register_ctl_offset = + gm20b_ctxsw_prog_get_local_priv_register_ctl_offset, + .hw_get_ts_tag_invalid_timestamp = + gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp, + .hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag, + .hw_record_ts_timestamp = + gm20b_ctxsw_prog_hw_record_ts_timestamp, + .hw_get_ts_record_size_in_bytes = + gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes, + .is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record, + .get_ts_buffer_aperture_mask = + gm20b_ctxsw_prog_get_ts_buffer_aperture_mask, + .set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records, + .set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr, + .set_pmu_options_boost_clock_frequencies = + gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies, + .set_full_preemption_ptr = + gv11b_ctxsw_prog_set_full_preemption_ptr, + .set_full_preemption_ptr_veid0 = + gv11b_ctxsw_prog_set_full_preemption_ptr_veid0, + .hw_get_perf_counter_register_stride = + gv11b_ctxsw_prog_hw_get_perf_counter_register_stride, + .set_context_buffer_ptr = + gv11b_ctxsw_prog_set_context_buffer_ptr, + .set_type_per_veid_header = + gv11b_ctxsw_prog_set_type_per_veid_header, + .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, + } }, .fb = { .init_hw = gv11b_fb_init_hw, @@ -889,6 +974,7 @@ int gv11b_init_hal(struct gk20a *g) gops->ltc = gv11b_ops.ltc; gops->ce2 = gv11b_ops.ce2; gops->gr = gv11b_ops.gr; + gops->gr.ctxsw_prog = gv11b_ops.gr.ctxsw_prog; gops->fb = gv11b_ops.fb; gops->clock_gating = gv11b_ops.clock_gating; gops->fifo = gv11b_ops.fifo; diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c index 28b6e9c9f..3fcfa645e 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c @@ -31,7 +31,6 @@ #include #include -#include #include #include "gv11b/subctx_gv11b.h" @@ -65,7 +64,8 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c) nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header"); if (!nvgpu_mem_is_valid(ctxheader)) { - ret = nvgpu_dma_alloc_sys(g, ctxsw_prog_fecs_header_v(), + ret = nvgpu_dma_alloc_sys(g, + g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(), ctxheader); if (ret != 0) { nvgpu_err(g, "failed to allocate sub ctx header"); @@ -100,7 +100,6 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) struct nvgpu_mem *ctxheader = &c->ctx_header; struct gk20a *g = c->g; int ret = 0; - u32 addr_lo, addr_hi; struct tsg_gk20a *tsg; struct nvgpu_gr_ctx *gr_ctx; @@ -114,38 +113,20 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) g->ops.mm.l2_flush(g, true); /* set priv access map */ - addr_lo = u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); - addr_hi = u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_priv_access_map_addr_lo_o(), - addr_lo); - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_priv_access_map_addr_hi_o(), - addr_hi); + g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, ctxheader, + gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); - addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va); - addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va); - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_patch_adr_lo_o(), - addr_lo); - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_patch_adr_hi_o(), - addr_hi); + g->ops.gr.ctxsw_prog.set_patch_addr(g, ctxheader, + gr_ctx->patch_ctx.mem.gpu_va); - g->ops.gr.write_pm_ptr(g, ctxheader, gr_ctx->pm_ctx.mem.gpu_va); - g->ops.gr.write_zcull_ptr(g, ctxheader, gr_ctx->zcull_ctx.gpu_va); + g->ops.gr.ctxsw_prog.set_pm_ptr(g, ctxheader, + gr_ctx->pm_ctx.mem.gpu_va); + g->ops.gr.ctxsw_prog.set_zcull_ptr(g, ctxheader, + gr_ctx->zcull_ctx.gpu_va); - addr_lo = u64_lo32(gpu_va); - addr_hi = u64_hi32(gpu_va); + g->ops.gr.ctxsw_prog.set_context_buffer_ptr(g, ctxheader, gpu_va); - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi); - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo); - - nvgpu_mem_wr(g, ctxheader, - ctxsw_prog_main_image_ctl_o(), - ctxsw_prog_main_image_ctl_type_per_veid_header_v()); + g->ops.gr.ctxsw_prog.set_type_per_veid_header(g, ctxheader); return ret; } diff --git a/drivers/gpu/nvgpu/include/nvgpu/fecs_trace.h b/drivers/gpu/nvgpu/include/nvgpu/fecs_trace.h index 5dc35302d..440efab3f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/fecs_trace.h +++ b/drivers/gpu/nvgpu/include/nvgpu/fecs_trace.h @@ -45,13 +45,11 @@ struct gk20a_fecs_trace_record { }; #ifdef CONFIG_GK20A_CTXSW_TRACE -u32 gk20a_fecs_trace_record_ts_tag_invalid_ts_v(void); -u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts); -u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts); -int gk20a_fecs_trace_num_ts(void); +int gk20a_fecs_trace_num_ts(struct gk20a *g); struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(struct gk20a *g, int idx); -bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r); +bool gk20a_fecs_trace_is_valid_record(struct gk20a *g, + struct gk20a_fecs_trace_record *r); int gk20a_fecs_trace_get_read_index(struct gk20a *g); int gk20a_fecs_trace_get_write_index(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index b611f2599..4b702885a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -342,7 +342,6 @@ struct gpu_ops { int (*update_smpc_ctxsw_mode)(struct gk20a *g, struct channel_gk20a *c, bool enable); - u32 (*get_hw_accessor_stream_out_mode)(void); int (*update_hwpm_ctxsw_mode)(struct gk20a *g, struct channel_gk20a *c, u64 gpu_va, @@ -454,10 +453,6 @@ struct gpu_ops { int (*commit_global_timeslice)(struct gk20a *g, struct channel_gk20a *c); int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va); - void (*write_zcull_ptr)(struct gk20a *g, - struct nvgpu_mem *mem, u64 gpu_va); - void (*write_pm_ptr)(struct gk20a *g, - struct nvgpu_mem *mem, u64 gpu_va); void (*set_preemption_buffer_va)(struct gk20a *g, struct nvgpu_mem *mem, u64 gpu_va); void (*load_tpc_mask)(struct gk20a *g); @@ -479,8 +474,6 @@ struct gpu_ops { u32 gpc, u32 tpc, u32 sm); void (*resume_all_sms)(struct gk20a *g); void (*disable_rd_coalesce)(struct gk20a *g); - void (*init_ctxsw_hdr_data)(struct gk20a *g, - struct nvgpu_mem *mem); void (*init_gfxp_wfi_timeout_count)(struct gk20a *g); unsigned long (*get_max_gfxp_wfi_timeout_count) (struct gk20a *g); @@ -539,6 +532,96 @@ struct gpu_ops { struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm); void (*commit_gfxp_rtv_cb)(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, bool patch); + struct { + u32 (*hw_get_fecs_header_size)(void); + u32 (*hw_get_gpccs_header_size)(void); + u32 (*hw_get_extended_buffer_segments_size_in_bytes)(void); + u32 (*hw_extended_marker_size_in_bytes)(void); + u32 (*hw_get_perf_counter_control_register_stride)(void); + u32 (*hw_get_perf_counter_register_stride)(void); + u32 (*get_main_image_ctx_id)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + u32 (*get_patch_count)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + void (*set_patch_count)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u32 count); + void (*set_patch_addr)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr); + void (*set_zcull_ptr)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr); + void (*set_zcull)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u32 mode); + void (*set_zcull_mode_no_ctxsw)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + bool (*is_zcull_mode_separate_buffer)(u32 mode); + void (*set_pm_ptr)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr); + void (*set_pm_mode)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u32 mode); + void (*set_pm_smpc_mode)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, bool enable); + u32 (*set_pm_mode_no_ctxsw)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + u32 (*set_pm_mode_ctxsw)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + u32 (*set_pm_mode_stream_out_ctxsw)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + u32 (*hw_get_pm_mode_no_ctxsw)(void); + u32 (*hw_get_pm_mode_ctxsw)(void); + u32 (*hw_get_pm_mode_stream_out_ctxsw)(void); + void (*init_ctxsw_hdr_data)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + void (*set_compute_preemption_mode_cta)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + void (*set_compute_preemption_mode_cilp)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + void (*set_graphics_preemption_mode_gfxp)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + void (*set_cde_enabled)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + void (*set_pc_sampling)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, bool enable); + void (*set_priv_access_map_config_mode)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, bool allow_all); + void (*set_priv_access_map_addr)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr); + void (*disable_verif_features)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + bool (*check_main_image_header_magic)(u8 *context); + bool (*check_local_header_magic)(u8 *context); + u32 (*get_num_gpcs)(u8 *context); + u32 (*get_num_tpcs)(u8 *context); + void (*get_extended_buffer_size_offset)(u8 *context, + u32 *size, u32 *offset); + void (*get_ppc_info)(u8 *context, + u32 *num_ppcs, u32 *ppc_mask); + u32 (*get_local_priv_register_ctl_offset)(u8 *context); + u32 (*hw_get_ts_tag_invalid_timestamp)(void); + u32 (*hw_get_ts_tag)(u64 ts); + u64 (*hw_record_ts_timestamp)(u64 ts); + u32 (*hw_get_ts_record_size_in_bytes)(void); + u32 (*is_ts_valid_record)(u32 magic_hi); + u32 (*get_ts_buffer_aperture_mask)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + void (*set_ts_num_records)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u32 num); + void (*set_ts_buffer_ptr)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr, + u32 aperture_mask); + void (*set_pmu_options_boost_clock_frequencies)( + struct gk20a *g, + struct nvgpu_mem *ctx_mem, u32 boosted_ctx); + void (*set_context_buffer_ptr)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr); + void (*set_full_preemption_ptr)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr); + void (*set_full_preemption_ptr_veid0)(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u64 addr); + void (*set_type_per_veid_header)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + void (*dump_ctxsw_stats)(struct gk20a *g, + struct nvgpu_mem *ctx_mem); + } ctxsw_prog; } gr; struct { void (*init_hw)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h index c41905c65..c70b10993 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h @@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void) { return 0x00000100U; } +static inline u32 ctxsw_prog_gpccs_header_stride_v(void) +{ + return 0x00000100U; +} static inline u32 ctxsw_prog_main_image_num_gpcs_o(void) { return 0x00000008U; @@ -118,6 +122,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void) { return U32(0x7U) << 0U; } +static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void) +{ + return 0x1U; +} static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void) { return 0x0U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_ctxsw_prog_gp106.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_ctxsw_prog_gp106.h index 94ecb0ff0..18d147927 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_ctxsw_prog_gp106.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp106/hw_ctxsw_prog_gp106.h @@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void) { return 0x00000100U; } +static inline u32 ctxsw_prog_gpccs_header_stride_v(void) +{ + return 0x00000100U; +} static inline u32 ctxsw_prog_main_image_num_gpcs_o(void) { return 0x00000008U; @@ -102,6 +106,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void) { return U32(0x7U) << 0U; } +static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void) +{ + return 0x1U; +} static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void) { return 0x0U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_ctxsw_prog_gp10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_ctxsw_prog_gp10b.h index 003db2cf7..55c8f9590 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_ctxsw_prog_gp10b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gp10b/hw_ctxsw_prog_gp10b.h @@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void) { return 0x00000100U; } +static inline u32 ctxsw_prog_gpccs_header_stride_v(void) +{ + return 0x00000100U; +} static inline u32 ctxsw_prog_main_image_num_gpcs_o(void) { return 0x00000008U; @@ -106,6 +110,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void) { return U32(0x7U) << 0U; } +static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void) +{ + return 0x1U; +} static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void) { return 0x0U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h index cc00de087..7f9f827c0 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h @@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void) { return 0x00000100U; } +static inline u32 ctxsw_prog_gpccs_header_stride_v(void) +{ + return 0x00000100U; +} static inline u32 ctxsw_prog_main_image_num_gpcs_o(void) { return 0x00000008U; @@ -142,6 +146,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void) { return U32(0x7U) << 0U; } +static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void) +{ + return 0x1U; +} static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void) { return 0x0U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h index 9f2ae2db5..11776204d 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h @@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void) { return 0x00000100U; } +static inline u32 ctxsw_prog_gpccs_header_stride_v(void) +{ + return 0x00000100U; +} static inline u32 ctxsw_prog_main_image_num_gpcs_o(void) { return 0x00000008U; @@ -142,6 +146,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void) { return U32(0x7U) << 0U; } +static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void) +{ + return 0x1U; +} static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void) { return 0x0U; diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_ctxsw_prog_tu104.h b/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_ctxsw_prog_tu104.h index 71ec574ca..983ab43b5 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_ctxsw_prog_tu104.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_ctxsw_prog_tu104.h @@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void) { return 0x00000100U; } +static inline u32 ctxsw_prog_gpccs_header_stride_v(void) +{ + return 0x00000100U; +} static inline u32 ctxsw_prog_main_image_num_gpcs_o(void) { return 0x00000008U; @@ -142,6 +146,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void) { return U32(0x7U) << 0U; } +static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void) +{ + return 0x1U; +} static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void) { return 0x0U; diff --git a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c index ee970998a..958ee2b6b 100644 --- a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c +++ b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c @@ -36,7 +36,6 @@ #include "os_linux.h" #include "ctxsw_trace.h" -#include #include #define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE) diff --git a/drivers/gpu/nvgpu/os/linux/debug_fecs_trace.c b/drivers/gpu/nvgpu/os/linux/debug_fecs_trace.c index 18b4192f4..fec309b96 100644 --- a/drivers/gpu/nvgpu/os/linux/debug_fecs_trace.c +++ b/drivers/gpu/nvgpu/os/linux/debug_fecs_trace.c @@ -55,23 +55,24 @@ static int gk20a_fecs_trace_debugfs_ring_seq_show( struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(g, *pos); int i; - const u32 invalid_tag = gk20a_fecs_trace_record_ts_tag_invalid_ts_v(); + const u32 invalid_tag = + g->ops.gr.ctxsw_prog.hw_get_ts_tag_invalid_timestamp(); u32 tag; u64 timestamp; seq_printf(s, "record #%lld (%p)\n", *pos, r); seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo); seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi); - if (gk20a_fecs_trace_is_valid_record(r)) { + if (gk20a_fecs_trace_is_valid_record(g, r)) { seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr); seq_printf(s, "\tcontext_id=%08x\n", r->context_id); seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr); seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id); - for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) { - tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]); + for (i = 0; i < gk20a_fecs_trace_num_ts(g); i++) { + tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]); if (tag == invalid_tag) continue; - timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]); + timestamp = g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]); timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp); } diff --git a/drivers/gpu/nvgpu/os/linux/sched.c b/drivers/gpu/nvgpu/os/linux/sched.c index 1f19313c6..541744270 100644 --- a/drivers/gpu/nvgpu/os/linux/sched.c +++ b/drivers/gpu/nvgpu/os/linux/sched.c @@ -31,7 +31,6 @@ #include "os_linux.h" #include "ioctl_tsg.h" -#include #include ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf, diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 37ff73354..7b8eb6a62 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -38,6 +38,9 @@ #include "common/fb/fb_tu104.h" #include "common/xve/xve_gp106.h" #include "common/netlist/netlist_tu104.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h" #include "common/therm/therm_gm20b.h" #include "common/therm/therm_gp10b.h" #include "common/therm/therm_gp106.h" @@ -412,8 +415,6 @@ static const struct gpu_ops tu104_ops = { .enable_exceptions = gr_gv11b_enable_exceptions, .get_lrf_tex_ltc_dram_override = get_ecc_override_val, .update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode, - .get_hw_accessor_stream_out_mode = - gr_gv100_get_hw_accessor_stream_out_mode, .get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon, .set_pmm_register = gr_gv100_set_pmm_register, .update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode, @@ -430,8 +431,6 @@ static const struct gpu_ops tu104_ops = { .program_zcull_mapping = gr_gv11b_program_zcull_mapping, .commit_global_timeslice = gr_gv11b_commit_global_timeslice, .commit_inst = gr_gv11b_commit_inst, - .write_zcull_ptr = gr_gv11b_write_zcull_ptr, - .write_pm_ptr = gr_gv11b_write_pm_ptr, .load_tpc_mask = gr_gv11b_load_tpc_mask, .trigger_suspend = gv11b_gr_sm_trigger_suspend, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -488,7 +487,6 @@ static const struct gpu_ops tu104_ops = { .handle_tpc_sm_ecc_exception = gr_gv11b_handle_tpc_sm_ecc_exception, .decode_egpc_addr = gv11b_gr_decode_egpc_addr, - .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data, .init_gfxp_wfi_timeout_count = gr_gv11b_init_gfxp_wfi_timeout_count, .get_max_gfxp_wfi_timeout_count = @@ -517,6 +515,93 @@ static const struct gpu_ops tu104_ops = { .dump_gr_falcon_stats = gk20a_fecs_dump_falcon_stats, .get_fecs_ctx_state_store_major_rev_id = gk20a_gr_get_fecs_ctx_state_store_major_rev_id, + .ctxsw_prog = { + .hw_get_fecs_header_size = + gm20b_ctxsw_prog_hw_get_fecs_header_size, + .hw_get_gpccs_header_size = + gm20b_ctxsw_prog_hw_get_gpccs_header_size, + .hw_get_extended_buffer_segments_size_in_bytes = + gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes, + .hw_extended_marker_size_in_bytes = + gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes, + .hw_get_perf_counter_control_register_stride = + gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride, + .get_main_image_ctx_id = + gm20b_ctxsw_prog_get_main_image_ctx_id, + .get_patch_count = gm20b_ctxsw_prog_get_patch_count, + .set_patch_count = gm20b_ctxsw_prog_set_patch_count, + .set_patch_addr = gm20b_ctxsw_prog_set_patch_addr, + .set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr, + .set_zcull = gm20b_ctxsw_prog_set_zcull, + .set_zcull_mode_no_ctxsw = + gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw, + .is_zcull_mode_separate_buffer = + gm20b_ctxsw_prog_is_zcull_mode_separate_buffer, + .set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr, + .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, + .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, + .set_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, + .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, + .hw_get_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, + .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, + .hw_get_pm_mode_stream_out_ctxsw = + gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw, + .set_pm_mode_stream_out_ctxsw = + gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw, + .init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data, + .set_compute_preemption_mode_cta = + gm20b_ctxsw_prog_set_compute_preemption_mode_cta, + .set_compute_preemption_mode_cilp = + gp10b_ctxsw_prog_set_compute_preemption_mode_cilp, + .set_graphics_preemption_mode_gfxp = + gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp, + .set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled, + .set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling, + .set_priv_access_map_config_mode = + gm20b_ctxsw_prog_set_priv_access_map_config_mode, + .set_priv_access_map_addr = + gm20b_ctxsw_prog_set_priv_access_map_addr, + .disable_verif_features = + gm20b_ctxsw_prog_disable_verif_features, + .check_main_image_header_magic = + gm20b_ctxsw_prog_check_main_image_header_magic, + .check_local_header_magic = + gm20b_ctxsw_prog_check_local_header_magic, + .get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs, + .get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs, + .get_extended_buffer_size_offset = + gm20b_ctxsw_prog_get_extended_buffer_size_offset, + .get_ppc_info = gm20b_ctxsw_prog_get_ppc_info, + .get_local_priv_register_ctl_offset = + gm20b_ctxsw_prog_get_local_priv_register_ctl_offset, + .hw_get_ts_tag_invalid_timestamp = + gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp, + .hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag, + .hw_record_ts_timestamp = + gm20b_ctxsw_prog_hw_record_ts_timestamp, + .hw_get_ts_record_size_in_bytes = + gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes, + .is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record, + .get_ts_buffer_aperture_mask = + gm20b_ctxsw_prog_get_ts_buffer_aperture_mask, + .set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records, + .set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr, + .set_pmu_options_boost_clock_frequencies = + gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies, + .set_full_preemption_ptr = + gv11b_ctxsw_prog_set_full_preemption_ptr, + .set_full_preemption_ptr_veid0 = + gv11b_ctxsw_prog_set_full_preemption_ptr_veid0, + .hw_get_perf_counter_register_stride = + gv11b_ctxsw_prog_hw_get_perf_counter_register_stride, + .set_context_buffer_ptr = + gv11b_ctxsw_prog_set_context_buffer_ptr, + .set_type_per_veid_header = + gv11b_ctxsw_prog_set_type_per_veid_header, + .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, + } }, .fb = { .init_hw = gv11b_fb_init_hw, @@ -1027,6 +1112,7 @@ int tu104_init_hal(struct gk20a *g) gops->ltc = tu104_ops.ltc; gops->ce2 = tu104_ops.ce2; gops->gr = tu104_ops.gr; + gops->gr.ctxsw_prog = tu104_ops.gr.ctxsw_prog; gops->fb = tu104_ops.fb; gops->clock_gating = tu104_ops.clock_gating; gops->fifo = tu104_ops.fifo; diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index b3ba813e8..efeb461fb 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c @@ -28,6 +28,8 @@ #include "common/fb/fb_gm20b.h" #include "common/fb/fb_gp10b.h" #include "common/netlist/netlist_gp10b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" #include "common/therm/therm_gm20b.h" #include "common/therm/therm_gp10b.h" #include "common/ltc/ltc_gm20b.h" @@ -176,8 +178,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .program_zcull_mapping = NULL, .commit_global_timeslice = NULL, .commit_inst = vgpu_gr_commit_inst, - .write_zcull_ptr = gr_gk20a_write_zcull_ptr, - .write_pm_ptr = gr_gk20a_write_pm_ptr, .load_tpc_mask = NULL, .trigger_suspend = NULL, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -212,7 +212,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .set_bes_crop_debug4 = NULL, .set_ctxsw_preemption_mode = vgpu_gr_gp10b_set_ctxsw_preemption_mode, - .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data, .init_gfxp_wfi_timeout_count = gr_gp10b_init_gfxp_wfi_timeout_count, .get_max_gfxp_wfi_timeout_count = @@ -230,6 +229,81 @@ static const struct gpu_ops vgpu_gp10b_ops = { .get_offset_in_gpccs_segment = gr_gk20a_get_offset_in_gpccs_segment, .set_debug_mode = gm20b_gr_set_debug_mode, + .ctxsw_prog = { + .hw_get_fecs_header_size = + gm20b_ctxsw_prog_hw_get_fecs_header_size, + .hw_get_gpccs_header_size = + gm20b_ctxsw_prog_hw_get_gpccs_header_size, + .hw_get_extended_buffer_segments_size_in_bytes = + gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes, + .hw_extended_marker_size_in_bytes = + gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes, + .hw_get_perf_counter_control_register_stride = + gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride, + .get_main_image_ctx_id = + gm20b_ctxsw_prog_get_main_image_ctx_id, + .get_patch_count = gm20b_ctxsw_prog_get_patch_count, + .set_patch_count = gm20b_ctxsw_prog_set_patch_count, + .set_patch_addr = gm20b_ctxsw_prog_set_patch_addr, + .set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr, + .set_zcull = gm20b_ctxsw_prog_set_zcull, + .set_zcull_mode_no_ctxsw = + gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw, + .is_zcull_mode_separate_buffer = + gm20b_ctxsw_prog_is_zcull_mode_separate_buffer, + .set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr, + .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, + .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, + .set_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, + .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, + .hw_get_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, + .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, + .init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data, + .set_compute_preemption_mode_cta = + gp10b_ctxsw_prog_set_compute_preemption_mode_cta, + .set_compute_preemption_mode_cilp = + gp10b_ctxsw_prog_set_compute_preemption_mode_cilp, + .set_graphics_preemption_mode_gfxp = + gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp, + .set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled, + .set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling, + .set_priv_access_map_config_mode = + gm20b_ctxsw_prog_set_priv_access_map_config_mode, + .set_priv_access_map_addr = + gm20b_ctxsw_prog_set_priv_access_map_addr, + .disable_verif_features = + gm20b_ctxsw_prog_disable_verif_features, + .check_main_image_header_magic = + gm20b_ctxsw_prog_check_main_image_header_magic, + .check_local_header_magic = + gm20b_ctxsw_prog_check_local_header_magic, + .get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs, + .get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs, + .get_extended_buffer_size_offset = + gm20b_ctxsw_prog_get_extended_buffer_size_offset, + .get_ppc_info = gm20b_ctxsw_prog_get_ppc_info, + .get_local_priv_register_ctl_offset = + gm20b_ctxsw_prog_get_local_priv_register_ctl_offset, + .hw_get_ts_tag_invalid_timestamp = + gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp, + .hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag, + .hw_record_ts_timestamp = + gm20b_ctxsw_prog_hw_record_ts_timestamp, + .hw_get_ts_record_size_in_bytes = + gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes, + .is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record, + .get_ts_buffer_aperture_mask = + gm20b_ctxsw_prog_get_ts_buffer_aperture_mask, + .set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records, + .set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr, + .set_pmu_options_boost_clock_frequencies = + gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies, + .set_full_preemption_ptr = + gp10b_ctxsw_prog_set_full_preemption_ptr, + .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, + } }, .fb = { .init_hw = NULL, @@ -575,6 +649,7 @@ int vgpu_gp10b_init_hal(struct gk20a *g) gops->ltc = vgpu_gp10b_ops.ltc; gops->ce2 = vgpu_gp10b_ops.ce2; gops->gr = vgpu_gp10b_ops.gr; + gops->gr.ctxsw_prog = vgpu_gp10b_ops.gr.ctxsw_prog; gops->fb = vgpu_gp10b_ops.fb; gops->clock_gating = vgpu_gp10b_ops.clock_gating; gops->fifo = vgpu_gp10b_ops.fifo; diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index d1dcd988a..e016df897 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -39,7 +39,6 @@ #include "gk20a/fecs_trace_gk20a.h" #include -#include void vgpu_gr_detect_sm_arch(struct gk20a *g) { @@ -614,7 +613,7 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) } /* PM ctxt switch is off by default */ - gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); + gr_ctx->pm_ctx.pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw(); nvgpu_log_fn(g, "done"); return 0; @@ -1087,18 +1086,21 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, * will return an error due to using the same GPU VA twice. */ - if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { + if (pm_ctx->pm_mode == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw()) { return 0; } p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; } else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) { - if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) { + if (pm_ctx->pm_mode == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) { return 0; } p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; } else if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) && - (g->ops.gr.get_hw_accessor_stream_out_mode)){ - if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) { + g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) { + if (pm_ctx->pm_mode == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) { return 0; } p->mode = TEGRA_VGPU_CTXSW_MODE_STREAM_OUT_CTXSW; @@ -1130,11 +1132,14 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, err = err ? err : msg.ret; if (!err) { if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW) { - pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f(); + pm_ctx->pm_mode = + g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw(); } else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) { - pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); + pm_ctx->pm_mode = + g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw(); } else { - pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode(); + pm_ctx->pm_mode = + g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw(); } } diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index 325ed7a2c..64b6df67f 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c @@ -29,6 +29,9 @@ #include "common/fb/fb_gp10b.h" #include "common/fb/fb_gv11b.h" #include "common/netlist/netlist_gv11b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h" +#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h" #include "common/therm/therm_gm20b.h" #include "common/therm/therm_gp10b.h" #include "common/therm/therm_gv11b.h" @@ -177,8 +180,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .enable_exceptions = NULL, .get_lrf_tex_ltc_dram_override = NULL, .update_smpc_ctxsw_mode = vgpu_gr_update_smpc_ctxsw_mode, - .get_hw_accessor_stream_out_mode = - gr_gv100_get_hw_accessor_stream_out_mode, .update_hwpm_ctxsw_mode = vgpu_gr_update_hwpm_ctxsw_mode, .record_sm_error_state = gv11b_gr_record_sm_error_state, .clear_sm_error_state = vgpu_gr_clear_sm_error_state, @@ -192,8 +193,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .program_zcull_mapping = NULL, .commit_global_timeslice = NULL, .commit_inst = vgpu_gr_gv11b_commit_inst, - .write_zcull_ptr = gr_gv11b_write_zcull_ptr, - .write_pm_ptr = gr_gv11b_write_pm_ptr, .load_tpc_mask = NULL, .trigger_suspend = NULL, .wait_for_pause = gr_gk20a_wait_for_pause, @@ -247,7 +246,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .handle_tpc_sm_ecc_exception = gr_gv11b_handle_tpc_sm_ecc_exception, .decode_egpc_addr = gv11b_gr_decode_egpc_addr, - .init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data, .init_gfxp_wfi_timeout_count = gr_gv11b_init_gfxp_wfi_timeout_count, .get_max_gfxp_wfi_timeout_count = @@ -265,6 +263,93 @@ static const struct gpu_ops vgpu_gv11b_ops = { .get_offset_in_gpccs_segment = gr_gk20a_get_offset_in_gpccs_segment, .set_debug_mode = gm20b_gr_set_debug_mode, + .ctxsw_prog = { + .hw_get_fecs_header_size = + gm20b_ctxsw_prog_hw_get_fecs_header_size, + .hw_get_gpccs_header_size = + gm20b_ctxsw_prog_hw_get_gpccs_header_size, + .hw_get_extended_buffer_segments_size_in_bytes = + gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes, + .hw_extended_marker_size_in_bytes = + gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes, + .hw_get_perf_counter_control_register_stride = + gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride, + .get_main_image_ctx_id = + gm20b_ctxsw_prog_get_main_image_ctx_id, + .get_patch_count = gm20b_ctxsw_prog_get_patch_count, + .set_patch_count = gm20b_ctxsw_prog_set_patch_count, + .set_patch_addr = gm20b_ctxsw_prog_set_patch_addr, + .set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr, + .set_zcull = gm20b_ctxsw_prog_set_zcull, + .set_zcull_mode_no_ctxsw = + gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw, + .is_zcull_mode_separate_buffer = + gm20b_ctxsw_prog_is_zcull_mode_separate_buffer, + .set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr, + .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, + .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, + .set_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, + .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, + .hw_get_pm_mode_no_ctxsw = + gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, + .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, + .hw_get_pm_mode_stream_out_ctxsw = + gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw, + .set_pm_mode_stream_out_ctxsw = + gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw, + .init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data, + .set_compute_preemption_mode_cta = + gp10b_ctxsw_prog_set_compute_preemption_mode_cta, + .set_compute_preemption_mode_cilp = + gp10b_ctxsw_prog_set_compute_preemption_mode_cilp, + .set_graphics_preemption_mode_gfxp = + gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp, + .set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled, + .set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling, + .set_priv_access_map_config_mode = + gm20b_ctxsw_prog_set_priv_access_map_config_mode, + .set_priv_access_map_addr = + gm20b_ctxsw_prog_set_priv_access_map_addr, + .disable_verif_features = + gm20b_ctxsw_prog_disable_verif_features, + .check_main_image_header_magic = + gm20b_ctxsw_prog_check_main_image_header_magic, + .check_local_header_magic = + gm20b_ctxsw_prog_check_local_header_magic, + .get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs, + .get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs, + .get_extended_buffer_size_offset = + gm20b_ctxsw_prog_get_extended_buffer_size_offset, + .get_ppc_info = gm20b_ctxsw_prog_get_ppc_info, + .get_local_priv_register_ctl_offset = + gm20b_ctxsw_prog_get_local_priv_register_ctl_offset, + .hw_get_ts_tag_invalid_timestamp = + gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp, + .hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag, + .hw_record_ts_timestamp = + gm20b_ctxsw_prog_hw_record_ts_timestamp, + .hw_get_ts_record_size_in_bytes = + gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes, + .is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record, + .get_ts_buffer_aperture_mask = + gm20b_ctxsw_prog_get_ts_buffer_aperture_mask, + .set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records, + .set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr, + .set_pmu_options_boost_clock_frequencies = + gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies, + .set_full_preemption_ptr = + gv11b_ctxsw_prog_set_full_preemption_ptr, + .set_full_preemption_ptr_veid0 = + gv11b_ctxsw_prog_set_full_preemption_ptr_veid0, + .hw_get_perf_counter_register_stride = + gv11b_ctxsw_prog_hw_get_perf_counter_register_stride, + .set_context_buffer_ptr = + gv11b_ctxsw_prog_set_context_buffer_ptr, + .set_type_per_veid_header = + gv11b_ctxsw_prog_set_type_per_veid_header, + .dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats, + } }, .fb = { .init_hw = NULL, @@ -640,6 +725,7 @@ int vgpu_gv11b_init_hal(struct gk20a *g) gops->ltc = vgpu_gv11b_ops.ltc; gops->ce2 = vgpu_gv11b_ops.ce2; gops->gr = vgpu_gv11b_ops.gr; + gops->gr.ctxsw_prog = vgpu_gv11b_ops.gr.ctxsw_prog; gops->fb = vgpu_gv11b_ops.fb; gops->clock_gating = vgpu_gv11b_ops.clock_gating; gops->fifo = vgpu_gv11b_ops.fifo; diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_subctx_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_subctx_gv11b.c index f22513af9..e49d99ba1 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_subctx_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_subctx_gv11b.c @@ -27,7 +27,6 @@ #include #include -#include int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c) { @@ -41,8 +40,8 @@ int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c) msg.handle = vgpu_get_handle(c->g); p->ch_handle = c->virt_ctx; p->ctx_header_va = __nvgpu_vm_alloc_va(c->vm, - ctxsw_prog_fecs_header_v(), - GMMU_PAGE_SIZE_KERNEL); + c->g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(), + GMMU_PAGE_SIZE_KERNEL); if (!p->ctx_header_va) { nvgpu_err(c->g, "alloc va failed for ctx_header"); return -ENOMEM;