mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: add separate unit for gr/ctxsw_prog
Add separate new unit gr/ctxsw_prog that provides interface to access h/w header files hw_ctxsw_prog_*.h Add below chip specific files that access above h/w unit and provide interface through g->ops.gr.ctxsw_prog.*() HAL for rest of the units common/gr/ctxsw_prog/ctxsw_prog_gm20b.c common/gr/ctxsw_prog/ctxsw_prog_gp10b.c common/gr/ctxsw_prog/ctxsw_prog_gv11b.c Remove all the h/w header includes from rest of the units and code. Remove direct calls to h/w headers ctxsw_prog_*() and use HALs g->ops.gr.ctxsw_prog.*() instead In gr_gk20a_find_priv_offset_in_ext_buffer(), h/w header ctxsw_prog_extended_num_smpc_quadrants_v() is only defined on gk20a And since we don't support gk20a remove corresponding code Add missing h/w header ctxsw_prog_main_image_pm_mode_ctxsw_f() for some chips Add new h/w header ctxsw_prog_gpccs_header_stride_v() Jira NVGPU-1526 Change-Id: I170f5c0da26ada833f94f5479ff299c0db56a732 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1966111 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
8ef20036c7
commit
6777bd5ed2
@@ -68,6 +68,9 @@ nvgpu-y += common/bus/bus_gk20a.o \
|
||||
common/ltc/ltc_gp10b.o \
|
||||
common/ltc/ltc_gv11b.o \
|
||||
common/ltc/ltc_tu104.o \
|
||||
common/gr/ctxsw_prog/ctxsw_prog_gm20b.o \
|
||||
common/gr/ctxsw_prog/ctxsw_prog_gp10b.o \
|
||||
common/gr/ctxsw_prog/ctxsw_prog_gv11b.o \
|
||||
common/netlist/netlist.o \
|
||||
common/netlist/netlist_sim.o \
|
||||
common/netlist/netlist_gm20b.o \
|
||||
|
||||
@@ -106,6 +106,9 @@ srcs := os/posix/nvgpu.c \
|
||||
common/falcon/falcon_gp106.c \
|
||||
common/falcon/falcon_gv100.c \
|
||||
common/falcon/falcon_tu104.c \
|
||||
common/gr/ctxsw_prog/ctxsw_prog_gm20b.c \
|
||||
common/gr/ctxsw_prog/ctxsw_prog_gp10b.c \
|
||||
common/gr/ctxsw_prog/ctxsw_prog_gv11b.c \
|
||||
common/netlist/netlist.c \
|
||||
common/netlist/netlist_sim.c \
|
||||
common/netlist/netlist_gm20b.c \
|
||||
|
||||
345
drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.c
Normal file
345
drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.c
Normal file
@@ -0,0 +1,345 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/utils.h>
|
||||
#include <nvgpu/nvgpu_mem.h>
|
||||
|
||||
#include "ctxsw_prog_gm20b.h"
|
||||
|
||||
#include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h>
|
||||
|
||||
u32 gm20b_ctxsw_prog_hw_get_fecs_header_size(void)
|
||||
{
|
||||
return ctxsw_prog_fecs_header_v();
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_hw_get_gpccs_header_size(void)
|
||||
{
|
||||
return ctxsw_prog_gpccs_header_stride_v();
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes(void)
|
||||
{
|
||||
return ctxsw_prog_extended_buffer_segments_size_in_bytes_v();
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes(void)
|
||||
{
|
||||
return ctxsw_prog_extended_marker_size_in_bytes_v();
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride(void)
|
||||
{
|
||||
return ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_get_main_image_ctx_id(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
return nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_context_id_o());
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_get_patch_count(struct gk20a *g, struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
return nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_patch_count_o());
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_patch_count(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u32 count)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_patch_count_o(), count);
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_patch_addr(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_patch_adr_lo_o(), u64_lo32(addr));
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_patch_adr_hi_o(), u64_hi32(addr));
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
|
||||
u64 addr)
|
||||
{
|
||||
addr = addr >> 8;
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_ptr_o(),
|
||||
u64_lo32(addr));
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_zcull(struct gk20a *g, struct nvgpu_mem *ctx_mem,
|
||||
u32 mode)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_o(), mode);
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_o(),
|
||||
ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
|
||||
}
|
||||
|
||||
bool gm20b_ctxsw_prog_is_zcull_mode_separate_buffer(u32 mode)
|
||||
{
|
||||
return mode == ctxsw_prog_main_image_zcull_mode_separate_buffer_v();
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
|
||||
u64 addr)
|
||||
{
|
||||
addr = addr >> 8;
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_ptr_o(),
|
||||
u64_lo32(addr));
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_pm_mode(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u32 mode)
|
||||
{
|
||||
u32 data;
|
||||
|
||||
data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o());
|
||||
|
||||
data = data & ~ctxsw_prog_main_image_pm_mode_m();
|
||||
data |= mode;
|
||||
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data);
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_pm_smpc_mode(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, bool enable)
|
||||
{
|
||||
u32 data;
|
||||
|
||||
data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o());
|
||||
|
||||
data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
|
||||
data |= enable ?
|
||||
ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
|
||||
ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
|
||||
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data);
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_set_pm_mode_no_ctxsw(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
gm20b_ctxsw_prog_set_pm_mode(g, ctx_mem,
|
||||
ctxsw_prog_main_image_pm_mode_no_ctxsw_f());
|
||||
return ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_set_pm_mode_ctxsw(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
gm20b_ctxsw_prog_set_pm_mode(g, ctx_mem,
|
||||
ctxsw_prog_main_image_pm_mode_ctxsw_f());
|
||||
return ctxsw_prog_main_image_pm_mode_ctxsw_f();
|
||||
}
|
||||
|
||||
|
||||
u32 gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw(void)
|
||||
{
|
||||
return ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw(void)
|
||||
{
|
||||
return ctxsw_prog_main_image_pm_mode_ctxsw_f();
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_init_ctxsw_hdr_data(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_num_save_ops_o(), 0);
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_num_restore_ops_o(), 0);
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_compute_preemption_mode_cta(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_preemption_options_o(),
|
||||
ctxsw_prog_main_image_preemption_options_control_cta_enabled_f());
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_cde_enabled(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
u32 data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_ctl_o());
|
||||
|
||||
data |= ctxsw_prog_main_image_ctl_cde_enabled_f();
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_ctl_o(), data);
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_pc_sampling(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, bool enable)
|
||||
{
|
||||
u32 data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o());
|
||||
|
||||
data &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
|
||||
data |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
|
||||
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data);
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_priv_access_map_config_mode(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, bool allow_all)
|
||||
{
|
||||
if (allow_all) {
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_priv_access_map_config_o(),
|
||||
ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f());
|
||||
} else {
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_priv_access_map_config_o(),
|
||||
ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f());
|
||||
}
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_priv_access_map_addr(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
|
||||
u64_lo32(addr));
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
|
||||
u64_hi32(addr));
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_disable_verif_features(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
u32 data;
|
||||
|
||||
data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_misc_options_o());
|
||||
|
||||
data = data & ~ctxsw_prog_main_image_misc_options_verif_features_m();
|
||||
data = data | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
|
||||
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_misc_options_o(), data);
|
||||
}
|
||||
|
||||
bool gm20b_ctxsw_prog_check_main_image_header_magic(u8 *context)
|
||||
{
|
||||
u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o());
|
||||
return magic == ctxsw_prog_main_image_magic_value_v_value_v();
|
||||
}
|
||||
|
||||
bool gm20b_ctxsw_prog_check_local_header_magic(u8 *context)
|
||||
{
|
||||
u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o());
|
||||
return magic == ctxsw_prog_local_magic_value_v_value_v();
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_get_num_gpcs(u8 *context)
|
||||
{
|
||||
return *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_get_num_tpcs(u8 *context)
|
||||
{
|
||||
return *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o());
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_get_extended_buffer_size_offset(u8 *context,
|
||||
u32 *size, u32 *offset)
|
||||
{
|
||||
u32 data = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o());
|
||||
|
||||
*size = ctxsw_prog_main_extended_buffer_ctl_size_v(data);
|
||||
*offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data);
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_get_ppc_info(u8 *context, u32 *num_ppcs, u32 *ppc_mask)
|
||||
{
|
||||
u32 data = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o());
|
||||
|
||||
*num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data);
|
||||
*ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data);
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_get_local_priv_register_ctl_offset(u8 *context)
|
||||
{
|
||||
u32 data = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
|
||||
return ctxsw_prog_local_priv_register_ctl_offset_v(data);
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp(void)
|
||||
{
|
||||
return ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_hw_get_ts_tag(u64 ts)
|
||||
{
|
||||
return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32));
|
||||
}
|
||||
|
||||
u64 gm20b_ctxsw_prog_hw_record_ts_timestamp(u64 ts)
|
||||
{
|
||||
return ts &
|
||||
~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32);
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes(void)
|
||||
{
|
||||
return ctxsw_prog_record_timestamp_record_size_in_bytes_v();
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_is_ts_valid_record(u32 magic_hi)
|
||||
{
|
||||
return magic_hi ==
|
||||
ctxsw_prog_record_timestamp_magic_value_hi_v_value_v();
|
||||
}
|
||||
|
||||
u32 gm20b_ctxsw_prog_get_ts_buffer_aperture_mask(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
return nvgpu_aperture_mask(g, ctx_mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_ts_num_records(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u32 num)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(num));
|
||||
}
|
||||
|
||||
void gm20b_ctxsw_prog_set_ts_buffer_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr, u32 aperture_mask)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
|
||||
u64_lo32(addr));
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(u64_hi32(addr)) |
|
||||
aperture_mask);
|
||||
}
|
||||
95
drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.h
Normal file
95
drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.h
Normal file
@@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_CTXSW_PROG_GM20B_H
|
||||
#define NVGPU_CTXSW_PROG_GM20B_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
struct gk20a;
|
||||
struct nvgpu_mem;
|
||||
|
||||
u32 gm20b_ctxsw_prog_hw_get_fecs_header_size(void);
|
||||
u32 gm20b_ctxsw_prog_hw_get_gpccs_header_size(void);
|
||||
u32 gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes(void);
|
||||
u32 gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes(void);
|
||||
u32 gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride(void);
|
||||
u32 gm20b_ctxsw_prog_get_main_image_ctx_id(struct gk20a *g, struct nvgpu_mem *ctx_mem);
|
||||
u32 gm20b_ctxsw_prog_get_patch_count(struct gk20a *g, struct nvgpu_mem *ctx_mem);
|
||||
void gm20b_ctxsw_prog_set_patch_count(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u32 count);
|
||||
void gm20b_ctxsw_prog_set_patch_addr(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr);
|
||||
void gm20b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
|
||||
u64 addr);
|
||||
void gm20b_ctxsw_prog_set_zcull(struct gk20a *g, struct nvgpu_mem *ctx_mem,
|
||||
u32 mode);
|
||||
void gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
bool gm20b_ctxsw_prog_is_zcull_mode_separate_buffer(u32 mode);
|
||||
void gm20b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
|
||||
u64 addr);
|
||||
void gm20b_ctxsw_prog_set_pm_mode(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u32 mode);
|
||||
void gm20b_ctxsw_prog_set_pm_smpc_mode(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, bool enable);
|
||||
u32 gm20b_ctxsw_prog_set_pm_mode_no_ctxsw(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
u32 gm20b_ctxsw_prog_set_pm_mode_ctxsw(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
u32 gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw(void);
|
||||
u32 gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw(void);
|
||||
void gm20b_ctxsw_prog_init_ctxsw_hdr_data(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void gm20b_ctxsw_prog_set_compute_preemption_mode_cta(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void gm20b_ctxsw_prog_set_cde_enabled(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void gm20b_ctxsw_prog_set_pc_sampling(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, bool enable);
|
||||
void gm20b_ctxsw_prog_set_priv_access_map_config_mode(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, bool allow_all);
|
||||
void gm20b_ctxsw_prog_set_priv_access_map_addr(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr);
|
||||
void gm20b_ctxsw_prog_disable_verif_features(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
bool gm20b_ctxsw_prog_check_main_image_header_magic(u8 *context);
|
||||
bool gm20b_ctxsw_prog_check_local_header_magic(u8 *context);
|
||||
u32 gm20b_ctxsw_prog_get_num_gpcs(u8 *context);
|
||||
u32 gm20b_ctxsw_prog_get_num_tpcs(u8 *context);
|
||||
void gm20b_ctxsw_prog_get_extended_buffer_size_offset(u8 *context,
|
||||
u32 *size, u32 *offset);
|
||||
void gm20b_ctxsw_prog_get_ppc_info(u8 *context, u32 *num_ppcs, u32 *ppc_mask);
|
||||
u32 gm20b_ctxsw_prog_get_local_priv_register_ctl_offset(u8 *context);
|
||||
u32 gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp(void);
|
||||
u32 gm20b_ctxsw_prog_hw_get_ts_tag(u64 ts);
|
||||
u64 gm20b_ctxsw_prog_hw_record_ts_timestamp(u64 ts);
|
||||
u32 gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes(void);
|
||||
u32 gm20b_ctxsw_prog_is_ts_valid_record(u32 magic_hi);
|
||||
u32 gm20b_ctxsw_prog_get_ts_buffer_aperture_mask(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void gm20b_ctxsw_prog_set_ts_num_records(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u32 num);
|
||||
void gm20b_ctxsw_prog_set_ts_buffer_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr, u32 aperture_mask);
|
||||
|
||||
#endif /* NVGPU_CTXSW_PROG_GM20B_H */
|
||||
129
drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gp10b.c
Normal file
129
drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gp10b.c
Normal file
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/utils.h>
|
||||
#include <nvgpu/nvgpu_mem.h>
|
||||
|
||||
#include "ctxsw_prog_gm20b.h"
|
||||
#include "ctxsw_prog_gp10b.h"
|
||||
|
||||
#include <nvgpu/hw/gp10b/hw_ctxsw_prog_gp10b.h>
|
||||
|
||||
void gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_graphics_preemption_options_o(),
|
||||
ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f());
|
||||
}
|
||||
|
||||
void gp10b_ctxsw_prog_set_compute_preemption_mode_cta(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_compute_preemption_options_o(),
|
||||
ctxsw_prog_main_image_compute_preemption_options_control_cta_f());
|
||||
}
|
||||
|
||||
void gp10b_ctxsw_prog_set_compute_preemption_mode_cilp(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_compute_preemption_options_o(),
|
||||
ctxsw_prog_main_image_compute_preemption_options_control_cilp_f());
|
||||
}
|
||||
|
||||
void gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u32 boosted_ctx)
|
||||
{
|
||||
u32 data = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f(boosted_ctx);
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pmu_options_o(), data);
|
||||
}
|
||||
|
||||
void gp10b_ctxsw_prog_set_full_preemption_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr)
|
||||
{
|
||||
addr = addr >> 8;
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_full_preemption_ptr_o(), u64_lo32(addr));
|
||||
}
|
||||
|
||||
void gp10b_ctxsw_prog_init_ctxsw_hdr_data(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_num_wfi_save_ops_o(), 0);
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_num_cta_save_ops_o(), 0);
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0);
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_num_cilp_save_ops_o(), 0);
|
||||
|
||||
gm20b_ctxsw_prog_init_ctxsw_hdr_data(g, ctx_mem);
|
||||
}
|
||||
|
||||
void gp10b_ctxsw_prog_dump_ctxsw_stats(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
nvgpu_err(g, "ctxsw_prog_main_image_magic_value_o : %x (expect %x)",
|
||||
nvgpu_mem_rd(g, ctx_mem,
|
||||
ctxsw_prog_main_image_magic_value_o()),
|
||||
ctxsw_prog_main_image_magic_value_v_value_v());
|
||||
|
||||
nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x",
|
||||
nvgpu_mem_rd(g, ctx_mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o()));
|
||||
|
||||
nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x",
|
||||
nvgpu_mem_rd(g, ctx_mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_o()));
|
||||
|
||||
nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_control : %x",
|
||||
nvgpu_mem_rd(g, ctx_mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_control_o()));
|
||||
|
||||
nvgpu_err(g, "NUM_SAVE_OPERATIONS : %d",
|
||||
nvgpu_mem_rd(g, ctx_mem,
|
||||
ctxsw_prog_main_image_num_save_ops_o()));
|
||||
nvgpu_err(g, "WFI_SAVE_OPERATIONS : %d",
|
||||
nvgpu_mem_rd(g, ctx_mem,
|
||||
ctxsw_prog_main_image_num_wfi_save_ops_o()));
|
||||
nvgpu_err(g, "CTA_SAVE_OPERATIONS : %d",
|
||||
nvgpu_mem_rd(g, ctx_mem,
|
||||
ctxsw_prog_main_image_num_cta_save_ops_o()));
|
||||
nvgpu_err(g, "GFXP_SAVE_OPERATIONS : %d",
|
||||
nvgpu_mem_rd(g, ctx_mem,
|
||||
ctxsw_prog_main_image_num_gfxp_save_ops_o()));
|
||||
nvgpu_err(g, "CILP_SAVE_OPERATIONS : %d",
|
||||
nvgpu_mem_rd(g, ctx_mem,
|
||||
ctxsw_prog_main_image_num_cilp_save_ops_o()));
|
||||
nvgpu_err(g,
|
||||
"image gfx preemption option (GFXP is 1) %x",
|
||||
nvgpu_mem_rd(g, ctx_mem,
|
||||
ctxsw_prog_main_image_graphics_preemption_options_o()));
|
||||
nvgpu_err(g,
|
||||
"image compute preemption option (CTA is 1) %x",
|
||||
nvgpu_mem_rd(g, ctx_mem,
|
||||
ctxsw_prog_main_image_compute_preemption_options_o()));
|
||||
}
|
||||
46
drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gp10b.h
Normal file
46
drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gp10b.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_CTXSW_PROG_GP10B_H
|
||||
#define NVGPU_CTXSW_PROG_GP10B_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
struct gk20a;
|
||||
struct nvgpu_mem;
|
||||
|
||||
void gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void gp10b_ctxsw_prog_set_compute_preemption_mode_cta(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void gp10b_ctxsw_prog_set_compute_preemption_mode_cilp(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u32 boosted_ctx);
|
||||
void gp10b_ctxsw_prog_set_full_preemption_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr);
|
||||
void gp10b_ctxsw_prog_init_ctxsw_hdr_data(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void gp10b_ctxsw_prog_dump_ctxsw_stats(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
|
||||
#endif /* NVGPU_CTXSW_PROG_GP10B_H */
|
||||
111
drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.c
Normal file
111
drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.c
Normal file
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/utils.h>
|
||||
#include <nvgpu/nvgpu_mem.h>
|
||||
|
||||
#include "ctxsw_prog_gm20b.h"
|
||||
#include "ctxsw_prog_gv11b.h"
|
||||
|
||||
#include <nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h>
|
||||
|
||||
void gv11b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
|
||||
u64 addr)
|
||||
{
|
||||
addr = addr >> 8;
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_ptr_o(),
|
||||
u64_lo32(addr));
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_ptr_hi_o(),
|
||||
u64_hi32(addr));
|
||||
}
|
||||
|
||||
void gv11b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
|
||||
u64 addr)
|
||||
{
|
||||
addr = addr >> 8;
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_ptr_o(),
|
||||
u64_lo32(addr));
|
||||
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_ptr_hi_o(),
|
||||
u64_hi32(addr));
|
||||
}
|
||||
|
||||
u32 gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw(void)
|
||||
{
|
||||
return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f();
|
||||
}
|
||||
|
||||
u32 gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
gm20b_ctxsw_prog_set_pm_mode(g, ctx_mem,
|
||||
ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f());
|
||||
return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f();
|
||||
}
|
||||
|
||||
void gv11b_ctxsw_prog_set_full_preemption_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr)
|
||||
{
|
||||
addr = addr >> 8;
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_full_preemption_ptr_o(),
|
||||
u64_lo32(addr));
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_full_preemption_ptr_hi_o(),
|
||||
u64_hi32(addr));
|
||||
}
|
||||
|
||||
void gv11b_ctxsw_prog_set_full_preemption_ptr_veid0(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr)
|
||||
{
|
||||
addr = addr >> 8;
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_full_preemption_ptr_veid0_o(),
|
||||
u64_lo32(addr));
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_full_preemption_ptr_veid0_hi_o(),
|
||||
u64_hi32(addr));
|
||||
}
|
||||
|
||||
u32 gv11b_ctxsw_prog_hw_get_perf_counter_register_stride(void)
|
||||
{
|
||||
return ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v();
|
||||
}
|
||||
|
||||
void gv11b_ctxsw_prog_set_context_buffer_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_context_buffer_ptr_hi_o(),
|
||||
u64_hi32(addr));
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_context_buffer_ptr_o(),
|
||||
u64_lo32(addr));
|
||||
}
|
||||
|
||||
void gv11b_ctxsw_prog_set_type_per_veid_header(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem)
|
||||
{
|
||||
nvgpu_mem_wr(g, ctx_mem,
|
||||
ctxsw_prog_main_image_ctl_o(),
|
||||
ctxsw_prog_main_image_ctl_type_per_veid_header_v());
|
||||
}
|
||||
45
drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.h
Normal file
45
drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_CTXSW_PROG_GV11B_H
|
||||
#define NVGPU_CTXSW_PROG_GV11B_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
void gv11b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
|
||||
u64 addr);
|
||||
void gv11b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
|
||||
u64 addr);
|
||||
u32 gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw(void);
|
||||
u32 gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void gv11b_ctxsw_prog_set_full_preemption_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr);
|
||||
void gv11b_ctxsw_prog_set_full_preemption_ptr_veid0(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr);
|
||||
u32 gv11b_ctxsw_prog_hw_get_perf_counter_register_stride(void);
|
||||
void gv11b_ctxsw_prog_set_context_buffer_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr);
|
||||
void gv11b_ctxsw_prog_set_type_per_veid_header(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
|
||||
#endif /* NVGPU_CTXSW_PROG_GV11B_H */
|
||||
@@ -43,7 +43,6 @@
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/fecs_trace.h>
|
||||
|
||||
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
|
||||
|
||||
struct gk20a_fecs_trace_hash_ent {
|
||||
@@ -62,29 +61,14 @@ struct gk20a_fecs_trace {
|
||||
};
|
||||
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
u32 gk20a_fecs_trace_record_ts_tag_invalid_ts_v(void)
|
||||
{
|
||||
return ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
|
||||
}
|
||||
|
||||
u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts)
|
||||
{
|
||||
return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32));
|
||||
}
|
||||
|
||||
u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts)
|
||||
{
|
||||
return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32);
|
||||
}
|
||||
|
||||
static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch)
|
||||
{
|
||||
return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL);
|
||||
}
|
||||
|
||||
int gk20a_fecs_trace_num_ts(void)
|
||||
int gk20a_fecs_trace_num_ts(struct gk20a *g)
|
||||
{
|
||||
return (ctxsw_prog_record_timestamp_record_size_in_bytes_v()
|
||||
return (g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes()
|
||||
- sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64);
|
||||
}
|
||||
|
||||
@@ -94,18 +78,18 @@ struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
|
||||
struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
|
||||
|
||||
return (struct gk20a_fecs_trace_record *)
|
||||
((u8 *) mem->cpu_va
|
||||
+ (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
|
||||
((u8 *) mem->cpu_va +
|
||||
(idx * g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes()));
|
||||
}
|
||||
|
||||
bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r)
|
||||
bool gk20a_fecs_trace_is_valid_record(struct gk20a *g,
|
||||
struct gk20a_fecs_trace_record *r)
|
||||
{
|
||||
/*
|
||||
* testing magic_hi should suffice. magic_lo is sometimes used
|
||||
* as a sequence number in experimental ucode.
|
||||
*/
|
||||
return (r->magic_hi
|
||||
== ctxsw_prog_record_timestamp_magic_value_hi_v_value_v());
|
||||
return g->ops.gr.ctxsw_prog.is_ts_valid_record(r->magic_hi);
|
||||
}
|
||||
|
||||
int gk20a_fecs_trace_get_read_index(struct gk20a *g)
|
||||
@@ -254,7 +238,7 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
|
||||
"consuming record trace=%p read=%d record=%p", trace, index, r);
|
||||
|
||||
if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) {
|
||||
if (unlikely(!gk20a_fecs_trace_is_valid_record(g, r))) {
|
||||
nvgpu_warn(g,
|
||||
"trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
|
||||
trace, index, r, r->magic_lo, r->magic_hi);
|
||||
@@ -278,10 +262,11 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
|
||||
entry.vmid = vmid;
|
||||
|
||||
/* break out FECS record into trace events */
|
||||
for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
|
||||
for (i = 0; i < gk20a_fecs_trace_num_ts(g); i++) {
|
||||
|
||||
entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
|
||||
entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
|
||||
entry.tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]);
|
||||
entry.timestamp =
|
||||
g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]);
|
||||
entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_ctxsw,
|
||||
@@ -402,7 +387,7 @@ static int gk20a_fecs_trace_periodic_polling(void *arg)
|
||||
size_t gk20a_fecs_trace_buffer_size(struct gk20a *g)
|
||||
{
|
||||
return GK20A_FECS_TRACE_NUM_RECORDS
|
||||
* ctxsw_prog_record_timestamp_record_size_in_bytes_v();
|
||||
* g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes();
|
||||
}
|
||||
|
||||
int gk20a_fecs_trace_init(struct gk20a *g)
|
||||
@@ -449,8 +434,6 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
|
||||
* in the context header.
|
||||
*/
|
||||
|
||||
u32 lo;
|
||||
u32 hi;
|
||||
u64 addr;
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
struct nvgpu_mem *mem;
|
||||
@@ -475,37 +458,24 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
|
||||
} else {
|
||||
addr = nvgpu_inst_block_addr(g, mem);
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
|
||||
aperture_mask = nvgpu_aperture_mask(g, mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
|
||||
aperture_mask =
|
||||
g->ops.gr.ctxsw_prog.get_ts_buffer_aperture_mask(g, mem);
|
||||
}
|
||||
if (!addr)
|
||||
return -ENOMEM;
|
||||
|
||||
lo = u64_lo32(addr);
|
||||
hi = u64_hi32(addr);
|
||||
|
||||
mem = &gr_ctx->mem;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
|
||||
lo, GK20A_FECS_TRACE_NUM_RECORDS);
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "addr=%llx count=%d", addr,
|
||||
GK20A_FECS_TRACE_NUM_RECORDS);
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
|
||||
GK20A_FECS_TRACE_NUM_RECORDS));
|
||||
g->ops.gr.ctxsw_prog.set_ts_num_records(g, mem,
|
||||
GK20A_FECS_TRACE_NUM_RECORDS);
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
|
||||
mem = &ch->ctx_header;
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
|
||||
lo);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
|
||||
aperture_mask);
|
||||
g->ops.gr.ctxsw_prog.set_ts_buffer_ptr(g, mem, addr, aperture_mask);
|
||||
|
||||
/* pid (process identifier) in user space, corresponds to tgid (thread
|
||||
* group id) in kernel space.
|
||||
@@ -573,7 +543,7 @@ int gk20a_gr_max_entries(struct gk20a *g,
|
||||
int tag;
|
||||
|
||||
/* Compute number of entries per record, with given filter */
|
||||
for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++)
|
||||
for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(g); tag++)
|
||||
n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
|
||||
|
||||
/* Return max number of entries generated for the whole ring */
|
||||
|
||||
@@ -53,7 +53,6 @@
|
||||
#include "gr_pri_gk20a.h"
|
||||
#include "regops_gk20a.h"
|
||||
|
||||
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
|
||||
@@ -93,7 +92,7 @@ u32 gr_gk20a_get_ctx_id(struct gk20a *g, struct nvgpu_mem *ctx_mem)
|
||||
Flush and invalidate before cpu update. */
|
||||
g->ops.mm.l2_flush(g, true);
|
||||
|
||||
ctx_id = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_context_id_o());
|
||||
ctx_id = g->ops.gr.ctxsw_prog.get_main_image_ctx_id(g, ctx_mem);
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, "ctx_id: 0x%x", ctx_id);
|
||||
return ctx_id;
|
||||
}
|
||||
@@ -619,9 +618,8 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
|
||||
{
|
||||
if (update_patch_count) {
|
||||
/* reset patch count if ucode has already processed it */
|
||||
gr_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
|
||||
&gr_ctx->mem,
|
||||
ctxsw_prog_main_image_patch_count_o());
|
||||
gr_ctx->patch_ctx.data_count =
|
||||
g->ops.gr.ctxsw_prog.get_patch_count(g, &gr_ctx->mem);
|
||||
nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
|
||||
gr_ctx->patch_ctx.data_count);
|
||||
}
|
||||
@@ -634,8 +632,7 @@ void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
|
||||
{
|
||||
/* Write context count to context image if it is mapped */
|
||||
if (update_patch_count) {
|
||||
nvgpu_mem_wr(g, &gr_ctx->mem,
|
||||
ctxsw_prog_main_image_patch_count_o(),
|
||||
g->ops.gr.ctxsw_prog.set_patch_count(g, &gr_ctx->mem,
|
||||
gr_ctx->patch_ctx.data_count);
|
||||
nvgpu_log(g, gpu_dbg_info, "write patch count %d",
|
||||
gr_ctx->patch_ctx.data_count);
|
||||
@@ -710,24 +707,6 @@ int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void gr_gk20a_write_zcull_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va)
|
||||
{
|
||||
u32 va = u64_lo32(gpu_va >> 8);
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_zcull_ptr_o(), va);
|
||||
}
|
||||
|
||||
void gr_gk20a_write_pm_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va)
|
||||
{
|
||||
u32 va = u64_lo32(gpu_va >> 8);
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_pm_ptr_o(), va);
|
||||
}
|
||||
|
||||
static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
|
||||
struct nvgpu_gr_ctx *gr_ctx)
|
||||
{
|
||||
@@ -740,8 +719,8 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
|
||||
mem = &gr_ctx->mem;
|
||||
|
||||
if (gr_ctx->zcull_ctx.gpu_va == 0ULL &&
|
||||
gr_ctx->zcull_ctx.ctx_sw_mode ==
|
||||
ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
|
||||
g->ops.gr.ctxsw_prog.is_zcull_mode_separate_buffer(
|
||||
gr_ctx->zcull_ctx.ctx_sw_mode)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -757,15 +736,14 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
|
||||
return ret;
|
||||
}
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_zcull_o(),
|
||||
gr_ctx->zcull_ctx.ctx_sw_mode);
|
||||
g->ops.gr.ctxsw_prog.set_zcull(g, mem, gr_ctx->zcull_ctx.ctx_sw_mode);
|
||||
|
||||
if (ctxheader->gpu_va != 0ULL) {
|
||||
g->ops.gr.write_zcull_ptr(g, ctxheader,
|
||||
gr_ctx->zcull_ctx.gpu_va);
|
||||
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, ctxheader,
|
||||
gr_ctx->zcull_ctx.gpu_va);
|
||||
} else {
|
||||
g->ops.gr.write_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va);
|
||||
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, mem,
|
||||
gr_ctx->zcull_ctx.gpu_va);
|
||||
}
|
||||
|
||||
gk20a_enable_channel_tsg(g, c);
|
||||
@@ -1302,7 +1280,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
|
||||
struct nvgpu_gr_ctx *gr_ctx)
|
||||
{
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
u32 ctx_header_bytes = ctxsw_prog_fecs_header_v();
|
||||
u32 ctx_header_bytes = g->ops.gr.ctxsw_prog.hw_get_fecs_header_size();
|
||||
u32 ctx_header_words;
|
||||
u32 i;
|
||||
u32 data;
|
||||
@@ -1497,10 +1475,9 @@ restore_fe_go_idle:
|
||||
data = nvgpu_mem_rd32(g, gr_mem, i);
|
||||
nvgpu_mem_wr32(g, gold_mem, i, data);
|
||||
}
|
||||
nvgpu_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
|
||||
ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
|
||||
g->ops.gr.ctxsw_prog.set_zcull_mode_no_ctxsw(g, gold_mem);
|
||||
|
||||
g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
|
||||
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, gold_mem, 0);
|
||||
|
||||
err = g->ops.gr.commit_inst(c, gr_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
|
||||
if (err != 0) {
|
||||
@@ -1554,7 +1531,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
|
||||
struct tsg_gk20a *tsg;
|
||||
struct nvgpu_gr_ctx *gr_ctx = NULL;
|
||||
struct nvgpu_mem *mem = NULL;
|
||||
u32 data;
|
||||
int ret;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
@@ -1587,16 +1563,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
|
||||
Flush and invalidate before cpu update. */
|
||||
g->ops.mm.l2_flush(g, true);
|
||||
|
||||
data = nvgpu_mem_rd(g, mem,
|
||||
ctxsw_prog_main_image_pm_o());
|
||||
|
||||
data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
|
||||
data |= enable_smpc_ctxsw ?
|
||||
ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
|
||||
ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_pm_o(), data);
|
||||
g->ops.gr.ctxsw_prog.set_pm_smpc_mode(g, mem, enable_smpc_ctxsw);
|
||||
|
||||
out:
|
||||
gk20a_enable_channel_tsg(g, c);
|
||||
@@ -1612,7 +1579,6 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
struct nvgpu_mem *gr_mem = NULL;
|
||||
struct nvgpu_gr_ctx *gr_ctx;
|
||||
struct pm_ctx_desc *pm_ctx;
|
||||
u32 data;
|
||||
u64 virt_addr = 0;
|
||||
struct nvgpu_mem *ctxheader = &c->ctx_header;
|
||||
int ret;
|
||||
@@ -1633,24 +1599,29 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
}
|
||||
|
||||
if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
|
||||
(g->ops.gr.get_hw_accessor_stream_out_mode == NULL)) {
|
||||
nvgpu_err(g, "Mode-E hwpm context switch mode is not supported");
|
||||
(g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw ==
|
||||
NULL)) {
|
||||
nvgpu_err(g,
|
||||
"Mode-E hwpm context switch mode is not supported");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW:
|
||||
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
|
||||
if (pm_ctx->pm_mode ==
|
||||
g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw()) {
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW:
|
||||
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
|
||||
if (pm_ctx->pm_mode ==
|
||||
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) {
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
|
||||
if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) {
|
||||
if (pm_ctx->pm_mode ==
|
||||
g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) {
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
@@ -1711,37 +1682,34 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
}
|
||||
}
|
||||
|
||||
data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
|
||||
data = data & ~ctxsw_prog_main_image_pm_mode_m();
|
||||
|
||||
switch (mode) {
|
||||
case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW:
|
||||
pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
|
||||
pm_ctx->pm_mode =
|
||||
g->ops.gr.ctxsw_prog.set_pm_mode_ctxsw(g, gr_mem);
|
||||
virt_addr = pm_ctx->mem.gpu_va;
|
||||
break;
|
||||
case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
|
||||
pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode();
|
||||
pm_ctx->pm_mode =
|
||||
g->ops.gr.ctxsw_prog.set_pm_mode_stream_out_ctxsw(g, gr_mem);
|
||||
virt_addr = pm_ctx->mem.gpu_va;
|
||||
break;
|
||||
case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW:
|
||||
pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
|
||||
pm_ctx->pm_mode =
|
||||
g->ops.gr.ctxsw_prog.set_pm_mode_no_ctxsw(g, gr_mem);
|
||||
virt_addr = 0;
|
||||
}
|
||||
|
||||
data |= pm_ctx->pm_mode;
|
||||
|
||||
nvgpu_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);
|
||||
|
||||
if (ctxheader->gpu_va != 0ULL) {
|
||||
struct channel_gk20a *ch;
|
||||
|
||||
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
|
||||
nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
|
||||
g->ops.gr.write_pm_ptr(g, &ch->ctx_header, virt_addr);
|
||||
g->ops.gr.ctxsw_prog.set_pm_ptr(g, &ch->ctx_header,
|
||||
virt_addr);
|
||||
}
|
||||
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
||||
} else {
|
||||
g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr);
|
||||
g->ops.gr.ctxsw_prog.set_pm_ptr(g, gr_mem, virt_addr);
|
||||
}
|
||||
|
||||
/* enable channel */
|
||||
@@ -1750,26 +1718,13 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
|
||||
struct nvgpu_mem *mem)
|
||||
{
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_num_save_ops_o(), 0);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_num_restore_ops_o(), 0);
|
||||
}
|
||||
|
||||
/* load saved fresh copy of gloden image into channel gr_ctx */
|
||||
int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
|
||||
struct channel_gk20a *c,
|
||||
struct nvgpu_gr_ctx *gr_ctx)
|
||||
{
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
u32 virt_addr_lo;
|
||||
u32 virt_addr_hi;
|
||||
u64 virt_addr = 0;
|
||||
u32 v, data;
|
||||
int ret = 0;
|
||||
struct nvgpu_mem *mem;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
@@ -1787,8 +1742,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
|
||||
gr->ctx_vars.local_golden_image,
|
||||
gr->ctx_vars.golden_image_size);
|
||||
|
||||
if (g->ops.gr.init_ctxsw_hdr_data != NULL) {
|
||||
g->ops.gr.init_ctxsw_hdr_data(g, mem);
|
||||
if (g->ops.gr.ctxsw_prog.init_ctxsw_hdr_data != NULL) {
|
||||
g->ops.gr.ctxsw_prog.init_ctxsw_hdr_data(g, mem);
|
||||
}
|
||||
|
||||
if ((g->ops.gr.enable_cde_in_fecs != NULL) && c->cde) {
|
||||
@@ -1796,32 +1751,13 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
|
||||
}
|
||||
|
||||
/* set priv access map */
|
||||
virt_addr_lo =
|
||||
u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
|
||||
virt_addr_hi =
|
||||
u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
|
||||
|
||||
if (g->allow_all) {
|
||||
data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f();
|
||||
} else {
|
||||
data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f();
|
||||
}
|
||||
|
||||
nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(),
|
||||
data);
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
|
||||
virt_addr_lo);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
|
||||
virt_addr_hi);
|
||||
g->ops.gr.ctxsw_prog.set_priv_access_map_config_mode(g, mem,
|
||||
g->allow_all);
|
||||
g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, mem,
|
||||
gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
|
||||
|
||||
/* disable verif features */
|
||||
v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o());
|
||||
v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
|
||||
v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
|
||||
nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v);
|
||||
g->ops.gr.ctxsw_prog.disable_verif_features(g, mem);
|
||||
|
||||
if (g->ops.gr.update_ctxsw_preemption_mode != NULL) {
|
||||
g->ops.gr.update_ctxsw_preemption_mode(g, gr_ctx, &c->ctx_header);
|
||||
@@ -1831,26 +1767,19 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
|
||||
g->ops.gr.update_boosted_ctx(g, mem, gr_ctx);
|
||||
}
|
||||
|
||||
virt_addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
|
||||
virt_addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_info, "write patch count = %d",
|
||||
gr_ctx->patch_ctx.data_count);
|
||||
nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
|
||||
gr_ctx->patch_ctx.data_count);
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_patch_adr_lo_o(),
|
||||
virt_addr_lo);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_patch_adr_hi_o(),
|
||||
virt_addr_hi);
|
||||
g->ops.gr.ctxsw_prog.set_patch_count(g, mem,
|
||||
gr_ctx->patch_ctx.data_count);
|
||||
g->ops.gr.ctxsw_prog.set_patch_addr(g, mem,
|
||||
gr_ctx->patch_ctx.mem.gpu_va);
|
||||
|
||||
/* Update main header region of the context buffer with the info needed
|
||||
* for PM context switching, including mode and possibly a pointer to
|
||||
* the PM backing store.
|
||||
*/
|
||||
if (gr_ctx->pm_ctx.pm_mode != ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
|
||||
if (gr_ctx->pm_ctx.pm_mode !=
|
||||
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) {
|
||||
if (gr_ctx->pm_ctx.mem.gpu_va == 0ULL) {
|
||||
nvgpu_err(g,
|
||||
"context switched pm with no pm buffer!");
|
||||
@@ -1862,15 +1791,10 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
|
||||
virt_addr = 0;
|
||||
}
|
||||
|
||||
data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o());
|
||||
data = data & ~ctxsw_prog_main_image_pm_mode_m();
|
||||
data |= gr_ctx->pm_ctx.pm_mode;
|
||||
g->ops.gr.ctxsw_prog.set_pm_mode(g, mem, gr_ctx->pm_ctx.pm_mode);
|
||||
g->ops.gr.ctxsw_prog.set_pm_ptr(g, mem, virt_addr);
|
||||
|
||||
nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);
|
||||
|
||||
g->ops.gr.write_pm_ptr(g, mem, virt_addr);
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
|
||||
@@ -2959,7 +2883,8 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
|
||||
}
|
||||
|
||||
/* PM ctxt switch is off by default */
|
||||
gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
|
||||
gr_ctx->pm_ctx.pm_mode =
|
||||
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw();
|
||||
} else {
|
||||
/* commit gr ctx buffer */
|
||||
err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
|
||||
@@ -6654,8 +6579,6 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
|
||||
u32 num_tpc;
|
||||
u32 tpc, gpc, reg;
|
||||
u32 chk_addr;
|
||||
u32 vaddr_lo;
|
||||
u32 vaddr_hi;
|
||||
u32 tmp;
|
||||
u32 num_ovr_perf_regs = 0;
|
||||
u32 *ovr_perf_regs = NULL;
|
||||
@@ -6682,8 +6605,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
|
||||
/* reset the patch count from previous
|
||||
runs,if ucode has already processed
|
||||
it */
|
||||
tmp = nvgpu_mem_rd(g, mem,
|
||||
ctxsw_prog_main_image_patch_count_o());
|
||||
tmp = g->ops.gr.ctxsw_prog.get_patch_count(g,
|
||||
mem);
|
||||
|
||||
if (tmp == 0U) {
|
||||
gr_ctx->patch_ctx.data_count = 0;
|
||||
@@ -6692,26 +6615,17 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
|
||||
gr_gk20a_ctx_patch_write(g, gr_ctx,
|
||||
addr, data, true);
|
||||
|
||||
vaddr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
|
||||
vaddr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
|
||||
g->ops.gr.ctxsw_prog.set_patch_count(g, mem,
|
||||
gr_ctx->patch_ctx.data_count);
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_patch_count_o(),
|
||||
gr_ctx->patch_ctx.data_count);
|
||||
if (ctxheader->gpu_va != 0ULL) {
|
||||
nvgpu_mem_wr(g, ctxheader,
|
||||
ctxsw_prog_main_image_patch_adr_lo_o(),
|
||||
vaddr_lo);
|
||||
nvgpu_mem_wr(g, ctxheader,
|
||||
ctxsw_prog_main_image_patch_adr_hi_o(),
|
||||
vaddr_hi);
|
||||
g->ops.gr.ctxsw_prog.set_patch_addr(g,
|
||||
ctxheader,
|
||||
gr_ctx->patch_ctx.mem.gpu_va);
|
||||
} else {
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_patch_adr_lo_o(),
|
||||
vaddr_lo);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_patch_adr_hi_o(),
|
||||
vaddr_hi);
|
||||
g->ops.gr.ctxsw_prog.set_patch_addr(g,
|
||||
mem,
|
||||
gr_ctx->patch_ctx.mem.gpu_va);
|
||||
}
|
||||
|
||||
/* we're not caching these on cpu side,
|
||||
@@ -6726,24 +6640,6 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
|
||||
|
||||
#define ILLEGAL_ID ((u32)~0)
|
||||
|
||||
static inline bool check_main_image_header_magic(u8 *context)
|
||||
{
|
||||
u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o());
|
||||
return magic == ctxsw_prog_main_image_magic_value_v_value_v();
|
||||
}
|
||||
static inline bool check_local_header_magic(u8 *context)
|
||||
{
|
||||
u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o());
|
||||
return magic == ctxsw_prog_local_magic_value_v_value_v();
|
||||
|
||||
}
|
||||
|
||||
/* most likely dupe of ctxsw_gpccs_header__size_1_v() */
|
||||
static inline u32 ctxsw_prog_ucode_header_size_in_bytes(void)
|
||||
{
|
||||
return 256U;
|
||||
}
|
||||
|
||||
void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
|
||||
u32 **ovr_perf_regs)
|
||||
{
|
||||
@@ -6758,9 +6654,9 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
|
||||
u32 context_buffer_size,
|
||||
u32 *priv_offset)
|
||||
{
|
||||
u32 i, data32;
|
||||
u32 i;
|
||||
u32 gpc_num, tpc_num;
|
||||
u32 num_gpcs, num_tpcs;
|
||||
u32 num_gpcs;
|
||||
u32 chk_addr;
|
||||
u32 ext_priv_offset, ext_priv_size;
|
||||
u8 *context;
|
||||
@@ -6809,18 +6705,18 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
buffer_segments_size = ctxsw_prog_extended_buffer_segments_size_in_bytes_v();
|
||||
buffer_segments_size = g->ops.gr.ctxsw_prog.hw_get_extended_buffer_segments_size_in_bytes();
|
||||
/* note below is in words/num_registers */
|
||||
marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2;
|
||||
marker_size = g->ops.gr.ctxsw_prog.hw_extended_marker_size_in_bytes() >> 2;
|
||||
|
||||
context = (u8 *)context_buffer;
|
||||
/* sanity check main header */
|
||||
if (!check_main_image_header_magic(context)) {
|
||||
if (!g->ops.gr.ctxsw_prog.check_main_image_header_magic(context)) {
|
||||
nvgpu_err(g,
|
||||
"Invalid main header: magic value");
|
||||
return -EINVAL;
|
||||
}
|
||||
num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
|
||||
num_gpcs = g->ops.gr.ctxsw_prog.get_num_gpcs(context);
|
||||
if (gpc_num >= num_gpcs) {
|
||||
nvgpu_err(g,
|
||||
"GPC 0x%08x is greater than total count 0x%08x!",
|
||||
@@ -6828,21 +6724,20 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
data32 = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o());
|
||||
ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32);
|
||||
g->ops.gr.ctxsw_prog.get_extended_buffer_size_offset(context,
|
||||
&ext_priv_size, &ext_priv_offset);
|
||||
if (0U == ext_priv_size) {
|
||||
nvgpu_log_info(g, " No extended memory in context buffer");
|
||||
return -EINVAL;
|
||||
}
|
||||
ext_priv_offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data32);
|
||||
|
||||
offset_to_segment = ext_priv_offset * ctxsw_prog_ucode_header_size_in_bytes();
|
||||
offset_to_segment = ext_priv_offset * 256U;
|
||||
offset_to_segment_end = offset_to_segment +
|
||||
(ext_priv_size * buffer_segments_size);
|
||||
|
||||
/* check local header magic */
|
||||
context += ctxsw_prog_ucode_header_size_in_bytes();
|
||||
if (!check_local_header_magic(context)) {
|
||||
context += g->ops.gr.ctxsw_prog.hw_get_fecs_header_size();
|
||||
if (!g->ops.gr.ctxsw_prog.check_local_header_magic(context)) {
|
||||
nvgpu_err(g,
|
||||
"Invalid local header: magic value");
|
||||
return -EINVAL;
|
||||
@@ -6937,8 +6832,6 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
|
||||
offset_to_segment += (num_ext_gpccs_ext_buffer_segments *
|
||||
buffer_segments_size * gpc_num);
|
||||
|
||||
num_tpcs = g->gr.gpc_tpc_count[gpc_num];
|
||||
|
||||
/* skip the head marker to start with */
|
||||
inter_seg_offset = marker_size;
|
||||
|
||||
@@ -6949,23 +6842,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
|
||||
(tpc_num * control_register_stride) +
|
||||
sm_dsm_perf_ctrl_reg_id;
|
||||
} else {
|
||||
/* skip all the control registers */
|
||||
inter_seg_offset = inter_seg_offset +
|
||||
(num_tpcs * control_register_stride);
|
||||
|
||||
/* skip the marker between control and counter segments */
|
||||
inter_seg_offset += marker_size;
|
||||
|
||||
/* skip over counter regs of TPCs before the one we want */
|
||||
inter_seg_offset = inter_seg_offset +
|
||||
(tpc_num * perf_register_stride) *
|
||||
ctxsw_prog_extended_num_smpc_quadrants_v();
|
||||
|
||||
/* skip over the register for the quadrants we do not want.
|
||||
* then skip to the register in this tpc */
|
||||
inter_seg_offset = inter_seg_offset +
|
||||
(perf_register_stride * quad) +
|
||||
sm_dsm_perf_reg_id;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* set the offset to the segment offset plus the inter segment offset to
|
||||
@@ -7146,7 +7023,6 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
|
||||
u32 *num_ppcs, u32 *ppc_mask,
|
||||
u32 *reg_ppc_count)
|
||||
{
|
||||
u32 data32;
|
||||
u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
|
||||
|
||||
/*
|
||||
@@ -7159,11 +7035,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o());
|
||||
|
||||
*num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32);
|
||||
*ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32);
|
||||
|
||||
g->ops.gr.ctxsw_prog.get_ppc_info(context, num_ppcs, ppc_mask);
|
||||
*reg_ppc_count = g->netlist_vars->ctxsw_regs.ppc.count;
|
||||
|
||||
return 0;
|
||||
@@ -7242,7 +7114,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
|
||||
u32 context_buffer_size,
|
||||
u32 *priv_offset)
|
||||
{
|
||||
u32 i, data32;
|
||||
u32 i;
|
||||
int err;
|
||||
enum ctxsw_addr_type addr_type;
|
||||
u32 broadcast_flags;
|
||||
@@ -7267,22 +7139,23 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
|
||||
}
|
||||
|
||||
context = (u8 *)context_buffer;
|
||||
if (!check_main_image_header_magic(context)) {
|
||||
if (!g->ops.gr.ctxsw_prog.check_main_image_header_magic(context)) {
|
||||
nvgpu_err(g,
|
||||
"Invalid main header: magic value");
|
||||
return -EINVAL;
|
||||
}
|
||||
num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
|
||||
num_gpcs = g->ops.gr.ctxsw_prog.get_num_gpcs(context);
|
||||
|
||||
/* Parse the FECS local header. */
|
||||
context += ctxsw_prog_ucode_header_size_in_bytes();
|
||||
if (!check_local_header_magic(context)) {
|
||||
context += g->ops.gr.ctxsw_prog.hw_get_fecs_header_size();
|
||||
if (!g->ops.gr.ctxsw_prog.check_local_header_magic(context)) {
|
||||
nvgpu_err(g,
|
||||
"Invalid FECS local header: magic value");
|
||||
return -EINVAL;
|
||||
}
|
||||
data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
|
||||
sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
|
||||
|
||||
sys_priv_offset =
|
||||
g->ops.gr.ctxsw_prog.get_local_priv_register_ctl_offset(context);
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "sys_priv_offset=0x%x", sys_priv_offset);
|
||||
|
||||
/* If found in Ext buffer, ok.
|
||||
@@ -7302,8 +7175,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
|
||||
if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
|
||||
(addr_type == CTXSW_ADDR_TYPE_BE)) {
|
||||
/* Find the offset in the FECS segment. */
|
||||
offset_to_segment = sys_priv_offset *
|
||||
ctxsw_prog_ucode_header_size_in_bytes();
|
||||
offset_to_segment = sys_priv_offset * 256U;
|
||||
|
||||
err = gr_gk20a_process_context_buffer_priv_segment(g,
|
||||
addr_type, addr,
|
||||
@@ -7326,15 +7198,14 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
|
||||
|
||||
/* Parse the GPCCS local header(s).*/
|
||||
for (i = 0; i < num_gpcs; i++) {
|
||||
context += ctxsw_prog_ucode_header_size_in_bytes();
|
||||
if (!check_local_header_magic(context)) {
|
||||
context += g->ops.gr.ctxsw_prog.hw_get_gpccs_header_size();
|
||||
if (!g->ops.gr.ctxsw_prog.check_local_header_magic(context)) {
|
||||
nvgpu_err(g,
|
||||
"Invalid GPCCS local header: magic value");
|
||||
return -EINVAL;
|
||||
|
||||
}
|
||||
data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
|
||||
gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
|
||||
gpc_priv_offset = g->ops.gr.ctxsw_prog.get_local_priv_register_ctl_offset(context);
|
||||
|
||||
err = gr_gk20a_determine_ppc_configuration(g, context,
|
||||
&num_ppcs, &ppc_mask,
|
||||
@@ -7345,7 +7216,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
|
||||
}
|
||||
|
||||
|
||||
num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o());
|
||||
num_tpcs = g->ops.gr.ctxsw_prog.get_num_tpcs(context);
|
||||
|
||||
if ((i == gpc_num) && ((tpc_num + 1U) > num_tpcs)) {
|
||||
nvgpu_err(g,
|
||||
@@ -7359,8 +7230,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
||||
"gpc_priv_offset 0x%#08x",
|
||||
gpc_priv_offset);
|
||||
offset_to_segment = gpc_priv_offset *
|
||||
ctxsw_prog_ucode_header_size_in_bytes();
|
||||
offset_to_segment = gpc_priv_offset * 256U;
|
||||
|
||||
err = g->ops.gr.get_offset_in_gpccs_segment(g,
|
||||
addr_type,
|
||||
|
||||
@@ -738,12 +738,6 @@ int gr_gk20a_init_sm_id_table(struct gk20a *g);
|
||||
|
||||
int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
|
||||
|
||||
void gr_gk20a_write_zcull_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va);
|
||||
|
||||
void gr_gk20a_write_pm_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va);
|
||||
|
||||
u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc);
|
||||
u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc);
|
||||
void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
@@ -751,8 +745,6 @@ void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
void gk20a_gr_init_ovr_sm_dsm_perf(void);
|
||||
void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
|
||||
u32 **ovr_perf_regs);
|
||||
void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
|
||||
struct nvgpu_mem *mem);
|
||||
u32 gr_gk20a_get_patch_slots(struct gk20a *g);
|
||||
int gk20a_gr_handle_notify_pending(struct gk20a *g,
|
||||
struct gr_gk20a_isr_data *isr_data);
|
||||
|
||||
@@ -39,7 +39,6 @@
|
||||
#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
|
||||
#include <nvgpu/hw/gm20b/hw_fifo_gm20b.h>
|
||||
#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
|
||||
#include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h>
|
||||
#include <nvgpu/hw/gm20b/hw_perf_gm20b.h>
|
||||
|
||||
void gr_gm20b_init_gpc_mmu(struct gk20a *g)
|
||||
@@ -537,7 +536,7 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
|
||||
*sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs;
|
||||
|
||||
*ctrl_register_stride =
|
||||
ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
|
||||
g->ops.gr.ctxsw_prog.hw_get_perf_counter_control_register_stride();
|
||||
}
|
||||
|
||||
u32 gr_gm20b_get_gpc_mask(struct gk20a *g)
|
||||
@@ -908,16 +907,11 @@ int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
|
||||
void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
|
||||
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_mem *ctxheader)
|
||||
{
|
||||
u32 cta_preempt_option =
|
||||
ctxsw_prog_main_image_preemption_options_control_cta_enabled_f();
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
|
||||
nvgpu_log_info(g, "CTA: %x", cta_preempt_option);
|
||||
nvgpu_mem_wr(g, &gr_ctx->mem,
|
||||
ctxsw_prog_main_image_preemption_options_o(),
|
||||
cta_preempt_option);
|
||||
g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g,
|
||||
&gr_ctx->mem);
|
||||
}
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
@@ -1069,7 +1063,6 @@ int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
|
||||
struct tsg_gk20a *tsg;
|
||||
struct nvgpu_gr_ctx *gr_ctx;
|
||||
struct nvgpu_mem *mem;
|
||||
u32 v;
|
||||
|
||||
nvgpu_log_fn(c->g, " ");
|
||||
|
||||
@@ -1084,11 +1077,7 @@ int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
||||
v = nvgpu_mem_rd(c->g, mem, ctxsw_prog_main_image_pm_o());
|
||||
v &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
|
||||
v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
|
||||
nvgpu_mem_wr(c->g, mem, ctxsw_prog_main_image_pm_o(), v);
|
||||
c->g->ops.gr.ctxsw_prog.set_pc_sampling(c->g, mem, enable);
|
||||
|
||||
nvgpu_log_fn(c->g, "done");
|
||||
|
||||
@@ -1176,11 +1165,7 @@ void gr_gm20b_init_cyclestats(struct gk20a *g)
|
||||
|
||||
void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem)
|
||||
{
|
||||
u32 cde_v;
|
||||
|
||||
cde_v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_ctl_o());
|
||||
cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f();
|
||||
nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v);
|
||||
g->ops.gr.ctxsw_prog.set_cde_enabled(g, mem);
|
||||
}
|
||||
|
||||
void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state)
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
#include "common/ptimer/ptimer_gk20a.h"
|
||||
#include "common/fb/fb_gm20b.h"
|
||||
#include "common/netlist/netlist_gm20b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||
#include "common/therm/therm_gm20b.h"
|
||||
#include "common/ltc/ltc_gm20b.h"
|
||||
#include "common/fuse/fuse_gm20b.h"
|
||||
@@ -288,8 +289,6 @@ static const struct gpu_ops gm20b_ops = {
|
||||
.program_zcull_mapping = gr_gk20a_program_zcull_mapping,
|
||||
.commit_global_timeslice = gr_gk20a_commit_global_timeslice,
|
||||
.commit_inst = gr_gk20a_commit_inst,
|
||||
.write_zcull_ptr = gr_gk20a_write_zcull_ptr,
|
||||
.write_pm_ptr = gr_gk20a_write_pm_ptr,
|
||||
.load_tpc_mask = gr_gm20b_load_tpc_mask,
|
||||
.trigger_suspend = gr_gk20a_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -312,7 +311,6 @@ static const struct gpu_ops gm20b_ops = {
|
||||
.init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf,
|
||||
.get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
|
||||
.disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
|
||||
.init_ctxsw_hdr_data = gk20a_gr_init_ctxsw_hdr_data,
|
||||
.fecs_host_int_enable = gr_gk20a_fecs_host_int_enable,
|
||||
.handle_notify_pending = gk20a_gr_handle_notify_pending,
|
||||
.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
|
||||
@@ -335,6 +333,72 @@ static const struct gpu_ops gm20b_ops = {
|
||||
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
|
||||
.alloc_gfxp_rtv_cb = NULL,
|
||||
.commit_gfxp_rtv_cb = NULL,
|
||||
.ctxsw_prog = {
|
||||
.hw_get_fecs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_fecs_header_size,
|
||||
.hw_get_gpccs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
|
||||
.hw_get_extended_buffer_segments_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
|
||||
.hw_extended_marker_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
|
||||
.hw_get_perf_counter_control_register_stride =
|
||||
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
|
||||
.get_main_image_ctx_id =
|
||||
gm20b_ctxsw_prog_get_main_image_ctx_id,
|
||||
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
|
||||
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
|
||||
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
|
||||
.set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr,
|
||||
.set_zcull = gm20b_ctxsw_prog_set_zcull,
|
||||
.set_zcull_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
|
||||
.is_zcull_mode_separate_buffer =
|
||||
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
|
||||
.set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr,
|
||||
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
|
||||
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
|
||||
.set_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
|
||||
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
|
||||
.hw_get_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
|
||||
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
|
||||
.init_ctxsw_hdr_data = gm20b_ctxsw_prog_init_ctxsw_hdr_data,
|
||||
.set_compute_preemption_mode_cta =
|
||||
gm20b_ctxsw_prog_set_compute_preemption_mode_cta,
|
||||
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
|
||||
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
|
||||
.set_priv_access_map_config_mode =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
|
||||
.set_priv_access_map_addr =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_addr,
|
||||
.disable_verif_features =
|
||||
gm20b_ctxsw_prog_disable_verif_features,
|
||||
.check_main_image_header_magic =
|
||||
gm20b_ctxsw_prog_check_main_image_header_magic,
|
||||
.check_local_header_magic =
|
||||
gm20b_ctxsw_prog_check_local_header_magic,
|
||||
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
|
||||
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
|
||||
.get_extended_buffer_size_offset =
|
||||
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
|
||||
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
|
||||
.get_local_priv_register_ctl_offset =
|
||||
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
|
||||
.hw_get_ts_tag_invalid_timestamp =
|
||||
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
|
||||
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
|
||||
.hw_record_ts_timestamp =
|
||||
gm20b_ctxsw_prog_hw_record_ts_timestamp,
|
||||
.hw_get_ts_record_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
|
||||
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
|
||||
.get_ts_buffer_aperture_mask =
|
||||
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
|
||||
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
|
||||
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
|
||||
}
|
||||
},
|
||||
.fb = {
|
||||
.init_hw = gm20b_fb_init_hw,
|
||||
@@ -695,6 +759,7 @@ int gm20b_init_hal(struct gk20a *g)
|
||||
gops->ltc = gm20b_ops.ltc;
|
||||
gops->ce2 = gm20b_ops.ce2;
|
||||
gops->gr = gm20b_ops.gr;
|
||||
gops->gr.ctxsw_prog = gm20b_ops.gr.ctxsw_prog;
|
||||
gops->fb = gm20b_ops.fb;
|
||||
gops->clock_gating = gm20b_ops.clock_gating;
|
||||
gops->fifo = gm20b_ops.fifo;
|
||||
|
||||
@@ -33,6 +33,8 @@
|
||||
#include "common/fb/fb_gm20b.h"
|
||||
#include "common/fb/fb_gp106.h"
|
||||
#include "common/netlist/netlist_gp106.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
||||
#include "common/xve/xve_gp106.h"
|
||||
#include "common/therm/therm_gm20b.h"
|
||||
#include "common/therm/therm_gp106.h"
|
||||
@@ -354,8 +356,6 @@ static const struct gpu_ops gp106_ops = {
|
||||
.program_zcull_mapping = gr_gk20a_program_zcull_mapping,
|
||||
.commit_global_timeslice = gr_gk20a_commit_global_timeslice,
|
||||
.commit_inst = gr_gk20a_commit_inst,
|
||||
.write_zcull_ptr = gr_gk20a_write_zcull_ptr,
|
||||
.write_pm_ptr = gr_gk20a_write_pm_ptr,
|
||||
.load_tpc_mask = gr_gm20b_load_tpc_mask,
|
||||
.trigger_suspend = gr_gk20a_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -412,6 +412,81 @@ static const struct gpu_ops gp106_ops = {
|
||||
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
|
||||
.alloc_gfxp_rtv_cb = NULL,
|
||||
.commit_gfxp_rtv_cb = NULL,
|
||||
.ctxsw_prog = {
|
||||
.hw_get_fecs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_fecs_header_size,
|
||||
.hw_get_gpccs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
|
||||
.hw_get_extended_buffer_segments_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
|
||||
.hw_extended_marker_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
|
||||
.hw_get_perf_counter_control_register_stride =
|
||||
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
|
||||
.get_main_image_ctx_id =
|
||||
gm20b_ctxsw_prog_get_main_image_ctx_id,
|
||||
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
|
||||
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
|
||||
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
|
||||
.set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr,
|
||||
.set_zcull = gm20b_ctxsw_prog_set_zcull,
|
||||
.set_zcull_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
|
||||
.is_zcull_mode_separate_buffer =
|
||||
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
|
||||
.set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr,
|
||||
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
|
||||
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
|
||||
.set_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
|
||||
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
|
||||
.hw_get_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
|
||||
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
|
||||
.init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data,
|
||||
.set_compute_preemption_mode_cta =
|
||||
gp10b_ctxsw_prog_set_compute_preemption_mode_cta,
|
||||
.set_compute_preemption_mode_cilp =
|
||||
gp10b_ctxsw_prog_set_compute_preemption_mode_cilp,
|
||||
.set_graphics_preemption_mode_gfxp =
|
||||
gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp,
|
||||
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
|
||||
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
|
||||
.set_priv_access_map_config_mode =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
|
||||
.set_priv_access_map_addr =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_addr,
|
||||
.disable_verif_features =
|
||||
gm20b_ctxsw_prog_disable_verif_features,
|
||||
.check_main_image_header_magic =
|
||||
gm20b_ctxsw_prog_check_main_image_header_magic,
|
||||
.check_local_header_magic =
|
||||
gm20b_ctxsw_prog_check_local_header_magic,
|
||||
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
|
||||
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
|
||||
.get_extended_buffer_size_offset =
|
||||
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
|
||||
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
|
||||
.get_local_priv_register_ctl_offset =
|
||||
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
|
||||
.hw_get_ts_tag_invalid_timestamp =
|
||||
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
|
||||
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
|
||||
.hw_record_ts_timestamp =
|
||||
gm20b_ctxsw_prog_hw_record_ts_timestamp,
|
||||
.hw_get_ts_record_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
|
||||
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
|
||||
.get_ts_buffer_aperture_mask =
|
||||
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
|
||||
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
|
||||
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
|
||||
.set_pmu_options_boost_clock_frequencies =
|
||||
gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies,
|
||||
.set_full_preemption_ptr =
|
||||
gp10b_ctxsw_prog_set_full_preemption_ptr,
|
||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||
}
|
||||
},
|
||||
.fb = {
|
||||
.init_hw = gm20b_fb_init_hw,
|
||||
@@ -848,6 +923,7 @@ int gp106_init_hal(struct gk20a *g)
|
||||
gops->ltc = gp106_ops.ltc;
|
||||
gops->ce2 = gp106_ops.ce2;
|
||||
gops->gr = gp106_ops.gr;
|
||||
gops->gr.ctxsw_prog = gp106_ops.gr.ctxsw_prog;
|
||||
gops->fb = gp106_ops.fb;
|
||||
gops->clock_gating = gp106_ops.clock_gating;
|
||||
gops->fifo = gp106_ops.fifo;
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
|
||||
#include "fecs_trace_gp10b.h"
|
||||
|
||||
#include <nvgpu/hw/gp10b/hw_ctxsw_prog_gp10b.h>
|
||||
#include <nvgpu/hw/gp10b/hw_gr_gp10b.h>
|
||||
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
|
||||
@@ -44,7 +44,6 @@
|
||||
|
||||
#include <nvgpu/hw/gp10b/hw_gr_gp10b.h>
|
||||
#include <nvgpu/hw/gp10b/hw_fifo_gp10b.h>
|
||||
#include <nvgpu/hw/gp10b/hw_ctxsw_prog_gp10b.h>
|
||||
|
||||
#define GFXP_WFI_TIMEOUT_COUNT_DEFAULT 100000U
|
||||
|
||||
@@ -1134,83 +1133,27 @@ fail_free_gk20a_ctx:
|
||||
void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm,
|
||||
struct nvgpu_gr_ctx *gr_ctx)
|
||||
{
|
||||
struct nvgpu_mem *mem = &gr_ctx->mem;
|
||||
|
||||
nvgpu_err(g, "ctxsw_prog_main_image_magic_value_o : %x (expect %x)",
|
||||
nvgpu_mem_rd(g, mem,
|
||||
ctxsw_prog_main_image_magic_value_o()),
|
||||
ctxsw_prog_main_image_magic_value_v_value_v());
|
||||
|
||||
nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x",
|
||||
nvgpu_mem_rd(g, mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o()));
|
||||
|
||||
nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x",
|
||||
nvgpu_mem_rd(g, mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_o()));
|
||||
|
||||
nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_control : %x",
|
||||
nvgpu_mem_rd(g, mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_control_o()));
|
||||
|
||||
nvgpu_err(g, "NUM_SAVE_OPERATIONS : %d",
|
||||
nvgpu_mem_rd(g, mem,
|
||||
ctxsw_prog_main_image_num_save_ops_o()));
|
||||
nvgpu_err(g, "WFI_SAVE_OPERATIONS : %d",
|
||||
nvgpu_mem_rd(g, mem,
|
||||
ctxsw_prog_main_image_num_wfi_save_ops_o()));
|
||||
nvgpu_err(g, "CTA_SAVE_OPERATIONS : %d",
|
||||
nvgpu_mem_rd(g, mem,
|
||||
ctxsw_prog_main_image_num_cta_save_ops_o()));
|
||||
nvgpu_err(g, "GFXP_SAVE_OPERATIONS : %d",
|
||||
nvgpu_mem_rd(g, mem,
|
||||
ctxsw_prog_main_image_num_gfxp_save_ops_o()));
|
||||
nvgpu_err(g, "CILP_SAVE_OPERATIONS : %d",
|
||||
nvgpu_mem_rd(g, mem,
|
||||
ctxsw_prog_main_image_num_cilp_save_ops_o()));
|
||||
nvgpu_err(g,
|
||||
"image gfx preemption option (GFXP is 1) %x",
|
||||
nvgpu_mem_rd(g, mem,
|
||||
ctxsw_prog_main_image_graphics_preemption_options_o()));
|
||||
nvgpu_err(g,
|
||||
"image compute preemption option (CTA is 1) %x",
|
||||
nvgpu_mem_rd(g, mem,
|
||||
ctxsw_prog_main_image_compute_preemption_options_o()));
|
||||
g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g, &gr_ctx->mem);
|
||||
}
|
||||
|
||||
void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
|
||||
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_mem *ctxheader)
|
||||
{
|
||||
struct nvgpu_mem *mem = &gr_ctx->mem;
|
||||
u32 gfxp_preempt_option =
|
||||
ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
|
||||
u32 cilp_preempt_option =
|
||||
ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
|
||||
u32 cta_preempt_option =
|
||||
ctxsw_prog_main_image_compute_preemption_options_control_cta_f();
|
||||
int err;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
|
||||
nvgpu_log_info(g, "GfxP: %x", gfxp_preempt_option);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_graphics_preemption_options_o(),
|
||||
gfxp_preempt_option);
|
||||
g->ops.gr.ctxsw_prog.set_graphics_preemption_mode_gfxp(g, mem);
|
||||
}
|
||||
|
||||
if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
|
||||
nvgpu_log_info(g, "CILP: %x", cilp_preempt_option);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_compute_preemption_options_o(),
|
||||
cilp_preempt_option);
|
||||
g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cilp(g, mem);
|
||||
}
|
||||
|
||||
if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
|
||||
nvgpu_log_info(g, "CTA: %x", cta_preempt_option);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_compute_preemption_options_o(),
|
||||
cta_preempt_option);
|
||||
g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g, mem);
|
||||
}
|
||||
|
||||
if (gr_ctx->preempt_ctxsw_buffer.gpu_va != 0ULL) {
|
||||
@@ -2226,12 +2169,10 @@ enable_ch:
|
||||
}
|
||||
|
||||
void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
|
||||
struct nvgpu_gr_ctx *gr_ctx) {
|
||||
u32 v;
|
||||
|
||||
v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f(
|
||||
struct nvgpu_gr_ctx *gr_ctx)
|
||||
{
|
||||
g->ops.gr.ctxsw_prog.set_pmu_options_boost_clock_frequencies(g, mem,
|
||||
gr_ctx->boosted_ctx);
|
||||
nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pmu_options_o(), v);
|
||||
}
|
||||
|
||||
int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
|
||||
@@ -2357,11 +2298,7 @@ int gr_gp10b_init_preemption_state(struct gk20a *g)
|
||||
void gr_gp10b_set_preemption_buffer_va(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va)
|
||||
{
|
||||
u32 va = u64_lo32(gpu_va >> 8);
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_full_preemption_ptr_o(), va);
|
||||
|
||||
g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, mem, gpu_va);
|
||||
}
|
||||
|
||||
void gr_gp10b_init_czf_bypass(struct gk20a *g)
|
||||
@@ -2386,20 +2323,6 @@ int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch)
|
||||
return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false);
|
||||
}
|
||||
|
||||
void gr_gp10b_init_ctxsw_hdr_data(struct gk20a *g, struct nvgpu_mem *mem)
|
||||
{
|
||||
gk20a_gr_init_ctxsw_hdr_data(g, mem);
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_num_wfi_save_ops_o(), 0);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_num_cta_save_ops_o(), 0);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_num_cilp_save_ops_o(), 0);
|
||||
}
|
||||
|
||||
void gr_gp10b_init_gfxp_wfi_timeout_count(struct gk20a *g)
|
||||
{
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
|
||||
@@ -147,7 +147,6 @@ void gr_gp10b_set_preemption_buffer_va(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va);
|
||||
int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch);
|
||||
void gr_gp10b_init_czf_bypass(struct gk20a *g);
|
||||
void gr_gp10b_init_ctxsw_hdr_data(struct gk20a *g, struct nvgpu_mem *mem);
|
||||
void gr_gp10b_init_gfxp_wfi_timeout_count(struct gk20a *g);
|
||||
unsigned long gr_gp10b_get_max_gfxp_wfi_timeout_count(struct gk20a *g);
|
||||
bool gr_gp10b_suspend_context(struct channel_gk20a *ch,
|
||||
|
||||
@@ -45,6 +45,8 @@
|
||||
#include "common/fb/fb_gm20b.h"
|
||||
#include "common/fb/fb_gp10b.h"
|
||||
#include "common/netlist/netlist_gp10b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
||||
#include "common/therm/therm_gm20b.h"
|
||||
#include "common/therm/therm_gp10b.h"
|
||||
#include "common/ltc/ltc_gm20b.h"
|
||||
@@ -310,8 +312,6 @@ static const struct gpu_ops gp10b_ops = {
|
||||
.program_zcull_mapping = gr_gk20a_program_zcull_mapping,
|
||||
.commit_global_timeslice = gr_gk20a_commit_global_timeslice,
|
||||
.commit_inst = gr_gk20a_commit_inst,
|
||||
.write_zcull_ptr = gr_gk20a_write_zcull_ptr,
|
||||
.write_pm_ptr = gr_gk20a_write_pm_ptr,
|
||||
.load_tpc_mask = gr_gm20b_load_tpc_mask,
|
||||
.trigger_suspend = gr_gk20a_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -345,7 +345,6 @@ static const struct gpu_ops gp10b_ops = {
|
||||
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
|
||||
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
|
||||
.init_ecc = gp10b_ecc_init,
|
||||
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
|
||||
.init_gfxp_wfi_timeout_count =
|
||||
gr_gp10b_init_gfxp_wfi_timeout_count,
|
||||
.get_max_gfxp_wfi_timeout_count =
|
||||
@@ -373,6 +372,81 @@ static const struct gpu_ops gp10b_ops = {
|
||||
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
|
||||
.alloc_gfxp_rtv_cb = NULL,
|
||||
.commit_gfxp_rtv_cb = NULL,
|
||||
.ctxsw_prog = {
|
||||
.hw_get_fecs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_fecs_header_size,
|
||||
.hw_get_gpccs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
|
||||
.hw_get_extended_buffer_segments_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
|
||||
.hw_extended_marker_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
|
||||
.hw_get_perf_counter_control_register_stride =
|
||||
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
|
||||
.get_main_image_ctx_id =
|
||||
gm20b_ctxsw_prog_get_main_image_ctx_id,
|
||||
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
|
||||
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
|
||||
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
|
||||
.set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr,
|
||||
.set_zcull = gm20b_ctxsw_prog_set_zcull,
|
||||
.set_zcull_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
|
||||
.is_zcull_mode_separate_buffer =
|
||||
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
|
||||
.set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr,
|
||||
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
|
||||
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
|
||||
.set_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
|
||||
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
|
||||
.hw_get_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
|
||||
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
|
||||
.init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data,
|
||||
.set_compute_preemption_mode_cta =
|
||||
gp10b_ctxsw_prog_set_compute_preemption_mode_cta,
|
||||
.set_compute_preemption_mode_cilp =
|
||||
gp10b_ctxsw_prog_set_compute_preemption_mode_cilp,
|
||||
.set_graphics_preemption_mode_gfxp =
|
||||
gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp,
|
||||
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
|
||||
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
|
||||
.set_priv_access_map_config_mode =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
|
||||
.set_priv_access_map_addr =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_addr,
|
||||
.disable_verif_features =
|
||||
gm20b_ctxsw_prog_disable_verif_features,
|
||||
.check_main_image_header_magic =
|
||||
gm20b_ctxsw_prog_check_main_image_header_magic,
|
||||
.check_local_header_magic =
|
||||
gm20b_ctxsw_prog_check_local_header_magic,
|
||||
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
|
||||
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
|
||||
.get_extended_buffer_size_offset =
|
||||
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
|
||||
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
|
||||
.get_local_priv_register_ctl_offset =
|
||||
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
|
||||
.hw_get_ts_tag_invalid_timestamp =
|
||||
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
|
||||
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
|
||||
.hw_record_ts_timestamp =
|
||||
gm20b_ctxsw_prog_hw_record_ts_timestamp,
|
||||
.hw_get_ts_record_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
|
||||
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
|
||||
.get_ts_buffer_aperture_mask =
|
||||
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
|
||||
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
|
||||
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
|
||||
.set_pmu_options_boost_clock_frequencies =
|
||||
gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies,
|
||||
.set_full_preemption_ptr =
|
||||
gp10b_ctxsw_prog_set_full_preemption_ptr,
|
||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||
}
|
||||
},
|
||||
.fb = {
|
||||
.init_hw = gm20b_fb_init_hw,
|
||||
@@ -763,6 +837,7 @@ int gp10b_init_hal(struct gk20a *g)
|
||||
gops->ltc = gp10b_ops.ltc;
|
||||
gops->ce2 = gp10b_ops.ce2;
|
||||
gops->gr = gp10b_ops.gr;
|
||||
gops->gr.ctxsw_prog = gp10b_ops.gr.ctxsw_prog;
|
||||
gops->fb = gp10b_ops.fb;
|
||||
gops->clock_gating = gp10b_ops.clock_gating;
|
||||
gops->fifo = gp10b_ops.fifo;
|
||||
|
||||
@@ -37,7 +37,6 @@
|
||||
#include <nvgpu/hw/gv100/hw_gr_gv100.h>
|
||||
#include <nvgpu/hw/gv100/hw_proj_gv100.h>
|
||||
#include <nvgpu/hw/gv100/hw_top_gv100.h>
|
||||
#include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h>
|
||||
#include <nvgpu/hw/gv100/hw_perf_gv100.h>
|
||||
|
||||
|
||||
@@ -429,11 +428,6 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
|
||||
}
|
||||
}
|
||||
|
||||
u32 gr_gv100_get_hw_accessor_stream_out_mode(void)
|
||||
{
|
||||
return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f();
|
||||
}
|
||||
|
||||
void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val,
|
||||
u32 num_chiplets, u32 num_perfmons)
|
||||
{
|
||||
|
||||
@@ -46,7 +46,6 @@ int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map,
|
||||
void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
|
||||
u32 num_fbpas,
|
||||
u32 *priv_addr_table, u32 *t);
|
||||
u32 gr_gv100_get_hw_accessor_stream_out_mode(void);
|
||||
void gr_gv100_init_hwpm_pmm_register(struct gk20a *g);
|
||||
void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val,
|
||||
u32 num_chiplets, u32 num_perfmons);
|
||||
|
||||
@@ -36,6 +36,9 @@
|
||||
#include "common/fb/fb_gv100.h"
|
||||
#include "common/xve/xve_gp106.h"
|
||||
#include "common/netlist/netlist_gv100.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
|
||||
#include "common/therm/therm_gm20b.h"
|
||||
#include "common/therm/therm_gp106.h"
|
||||
#include "common/therm/therm_gp10b.h"
|
||||
@@ -398,8 +401,6 @@ static const struct gpu_ops gv100_ops = {
|
||||
.enable_exceptions = gr_gv11b_enable_exceptions,
|
||||
.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
|
||||
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
|
||||
.get_hw_accessor_stream_out_mode =
|
||||
gr_gv100_get_hw_accessor_stream_out_mode,
|
||||
.get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon,
|
||||
.set_pmm_register = gr_gv100_set_pmm_register,
|
||||
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
|
||||
@@ -416,8 +417,6 @@ static const struct gpu_ops gv100_ops = {
|
||||
.program_zcull_mapping = gr_gv11b_program_zcull_mapping,
|
||||
.commit_global_timeslice = gr_gv11b_commit_global_timeslice,
|
||||
.commit_inst = gr_gv11b_commit_inst,
|
||||
.write_zcull_ptr = gr_gv11b_write_zcull_ptr,
|
||||
.write_pm_ptr = gr_gv11b_write_pm_ptr,
|
||||
.load_tpc_mask = gr_gv11b_load_tpc_mask,
|
||||
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -493,6 +492,93 @@ static const struct gpu_ops gv100_ops = {
|
||||
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
|
||||
.alloc_gfxp_rtv_cb = NULL,
|
||||
.commit_gfxp_rtv_cb = NULL,
|
||||
.ctxsw_prog = {
|
||||
.hw_get_fecs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_fecs_header_size,
|
||||
.hw_get_gpccs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
|
||||
.hw_get_extended_buffer_segments_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
|
||||
.hw_extended_marker_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
|
||||
.hw_get_perf_counter_control_register_stride =
|
||||
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
|
||||
.get_main_image_ctx_id =
|
||||
gm20b_ctxsw_prog_get_main_image_ctx_id,
|
||||
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
|
||||
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
|
||||
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
|
||||
.set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr,
|
||||
.set_zcull = gm20b_ctxsw_prog_set_zcull,
|
||||
.set_zcull_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
|
||||
.is_zcull_mode_separate_buffer =
|
||||
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
|
||||
.set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr,
|
||||
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
|
||||
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
|
||||
.set_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
|
||||
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
|
||||
.hw_get_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
|
||||
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
|
||||
.hw_get_pm_mode_stream_out_ctxsw =
|
||||
gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw,
|
||||
.set_pm_mode_stream_out_ctxsw =
|
||||
gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw,
|
||||
.init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data,
|
||||
.set_compute_preemption_mode_cta =
|
||||
gp10b_ctxsw_prog_set_compute_preemption_mode_cta,
|
||||
.set_compute_preemption_mode_cilp =
|
||||
gp10b_ctxsw_prog_set_compute_preemption_mode_cilp,
|
||||
.set_graphics_preemption_mode_gfxp =
|
||||
gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp,
|
||||
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
|
||||
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
|
||||
.set_priv_access_map_config_mode =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
|
||||
.set_priv_access_map_addr =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_addr,
|
||||
.disable_verif_features =
|
||||
gm20b_ctxsw_prog_disable_verif_features,
|
||||
.check_main_image_header_magic =
|
||||
gm20b_ctxsw_prog_check_main_image_header_magic,
|
||||
.check_local_header_magic =
|
||||
gm20b_ctxsw_prog_check_local_header_magic,
|
||||
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
|
||||
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
|
||||
.get_extended_buffer_size_offset =
|
||||
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
|
||||
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
|
||||
.get_local_priv_register_ctl_offset =
|
||||
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
|
||||
.hw_get_ts_tag_invalid_timestamp =
|
||||
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
|
||||
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
|
||||
.hw_record_ts_timestamp =
|
||||
gm20b_ctxsw_prog_hw_record_ts_timestamp,
|
||||
.hw_get_ts_record_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
|
||||
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
|
||||
.get_ts_buffer_aperture_mask =
|
||||
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
|
||||
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
|
||||
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
|
||||
.set_pmu_options_boost_clock_frequencies =
|
||||
gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies,
|
||||
.set_full_preemption_ptr =
|
||||
gv11b_ctxsw_prog_set_full_preemption_ptr,
|
||||
.set_full_preemption_ptr_veid0 =
|
||||
gv11b_ctxsw_prog_set_full_preemption_ptr_veid0,
|
||||
.hw_get_perf_counter_register_stride =
|
||||
gv11b_ctxsw_prog_hw_get_perf_counter_register_stride,
|
||||
.set_context_buffer_ptr =
|
||||
gv11b_ctxsw_prog_set_context_buffer_ptr,
|
||||
.set_type_per_veid_header =
|
||||
gv11b_ctxsw_prog_set_type_per_veid_header,
|
||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||
}
|
||||
},
|
||||
.fb = {
|
||||
.init_hw = gv11b_fb_init_hw,
|
||||
@@ -1001,6 +1087,7 @@ int gv100_init_hal(struct gk20a *g)
|
||||
gops->ltc = gv100_ops.ltc;
|
||||
gops->ce2 = gv100_ops.ce2;
|
||||
gops->gr = gv100_ops.gr;
|
||||
gops->gr.ctxsw_prog = gv100_ops.gr.ctxsw_prog;
|
||||
gops->fb = gv100_ops.fb;
|
||||
gops->clock_gating = gv100_ops.clock_gating;
|
||||
gops->fifo = gv100_ops.fifo;
|
||||
|
||||
@@ -56,7 +56,6 @@
|
||||
#include <nvgpu/hw/gv11b/hw_gr_gv11b.h>
|
||||
#include <nvgpu/hw/gv11b/hw_fifo_gv11b.h>
|
||||
#include <nvgpu/hw/gv11b/hw_proj_gv11b.h>
|
||||
#include <nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h>
|
||||
#include <nvgpu/hw/gv11b/hw_ram_gv11b.h>
|
||||
#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
|
||||
|
||||
@@ -1663,38 +1662,23 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
|
||||
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_mem *ctxheader)
|
||||
{
|
||||
struct nvgpu_mem *mem = &gr_ctx->mem;
|
||||
u32 gfxp_preempt_option =
|
||||
ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
|
||||
u32 cilp_preempt_option =
|
||||
ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
|
||||
u32 cta_preempt_option =
|
||||
ctxsw_prog_main_image_compute_preemption_options_control_cta_f();
|
||||
int err;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (gr_ctx->graphics_preempt_mode ==
|
||||
NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
|
||||
nvgpu_log_info(g, "GfxP: %x", gfxp_preempt_option);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_graphics_preemption_options_o(),
|
||||
gfxp_preempt_option);
|
||||
g->ops.gr.ctxsw_prog.set_graphics_preemption_mode_gfxp(g, mem);
|
||||
}
|
||||
|
||||
if (gr_ctx->compute_preempt_mode ==
|
||||
NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
|
||||
nvgpu_log_info(g, "CILP: %x", cilp_preempt_option);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_compute_preemption_options_o(),
|
||||
cilp_preempt_option);
|
||||
g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cilp(g, mem);
|
||||
}
|
||||
|
||||
if (gr_ctx->compute_preempt_mode ==
|
||||
NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
|
||||
nvgpu_log_info(g, "CTA: %x", cta_preempt_option);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_compute_preemption_options_o(),
|
||||
cta_preempt_option);
|
||||
g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g, mem);
|
||||
}
|
||||
|
||||
if (gr_ctx->preempt_ctxsw_buffer.gpu_va != 0ULL) {
|
||||
@@ -2947,35 +2931,6 @@ int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gr_gv11b_write_zcull_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va)
|
||||
{
|
||||
u32 va_lo, va_hi;
|
||||
|
||||
gpu_va = gpu_va >> 8;
|
||||
va_lo = u64_lo32(gpu_va);
|
||||
va_hi = u64_hi32(gpu_va);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_zcull_ptr_o(), va_lo);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_zcull_ptr_hi_o(), va_hi);
|
||||
}
|
||||
|
||||
|
||||
void gr_gv11b_write_pm_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va)
|
||||
{
|
||||
u32 va_lo, va_hi;
|
||||
|
||||
gpu_va = gpu_va >> 8;
|
||||
va_lo = u64_lo32(gpu_va);
|
||||
va_hi = u64_hi32(gpu_va);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_pm_ptr_o(), va_lo);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_pm_ptr_hi_o(), va_hi);
|
||||
}
|
||||
|
||||
void gr_gv11b_load_tpc_mask(struct gk20a *g)
|
||||
{
|
||||
u32 pes_tpc_mask = 0, fuse_tpc_mask;
|
||||
@@ -3009,25 +2964,9 @@ void gr_gv11b_load_tpc_mask(struct gk20a *g)
|
||||
void gr_gv11b_set_preemption_buffer_va(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va)
|
||||
{
|
||||
u32 addr_lo, addr_hi;
|
||||
|
||||
/* gpu va still needs to be 8 bit aligned */
|
||||
gpu_va = gpu_va >> 8;
|
||||
|
||||
addr_lo = u64_lo32(gpu_va);
|
||||
addr_hi = u64_hi32(gpu_va);
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_full_preemption_ptr_o(), addr_lo);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_full_preemption_ptr_hi_o(), addr_hi);
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_full_preemption_ptr_veid0_o(), addr_lo);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_full_preemption_ptr_veid0_hi_o(),
|
||||
addr_hi);
|
||||
|
||||
g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, mem, gpu_va);
|
||||
g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g, mem, gpu_va);
|
||||
}
|
||||
|
||||
int gr_gv11b_init_fs_state(struct gk20a *g)
|
||||
@@ -3936,7 +3875,7 @@ void gv11b_gr_get_sm_dsm_perf_regs(struct gk20a *g,
|
||||
*num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs;
|
||||
*sm_dsm_perf_regs = _sm_dsm_perf_regs;
|
||||
*perf_register_stride =
|
||||
ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v();
|
||||
g->ops.gr.ctxsw_prog.hw_get_perf_counter_register_stride();
|
||||
}
|
||||
|
||||
void gv11b_gr_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
|
||||
@@ -3947,7 +3886,7 @@ void gv11b_gr_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
|
||||
*num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs;
|
||||
*sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs;
|
||||
*ctrl_register_stride =
|
||||
ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
|
||||
g->ops.gr.ctxsw_prog.hw_get_perf_counter_control_register_stride();
|
||||
}
|
||||
|
||||
void gv11b_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
|
||||
|
||||
@@ -152,10 +152,6 @@ void gr_gv11b_program_sm_id_numbering(struct gk20a *g,
|
||||
int gr_gv11b_load_smid_config(struct gk20a *g);
|
||||
int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va);
|
||||
int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c);
|
||||
void gr_gv11b_write_zcull_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va);
|
||||
void gr_gv11b_write_pm_ptr(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va);
|
||||
void gr_gv11b_load_tpc_mask(struct gk20a *g);
|
||||
void gr_gv11b_set_preemption_buffer_va(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va);
|
||||
|
||||
@@ -35,6 +35,9 @@
|
||||
#include "common/fb/fb_gp10b.h"
|
||||
#include "common/fb/fb_gv11b.h"
|
||||
#include "common/netlist/netlist_gv11b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
|
||||
#include "common/therm/therm_gm20b.h"
|
||||
#include "common/therm/therm_gp10b.h"
|
||||
#include "common/therm/therm_gv11b.h"
|
||||
@@ -350,8 +353,6 @@ static const struct gpu_ops gv11b_ops = {
|
||||
.enable_exceptions = gr_gv11b_enable_exceptions,
|
||||
.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
|
||||
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
|
||||
.get_hw_accessor_stream_out_mode =
|
||||
gr_gv100_get_hw_accessor_stream_out_mode,
|
||||
.get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon,
|
||||
.set_pmm_register = gr_gv100_set_pmm_register,
|
||||
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
|
||||
@@ -368,8 +369,6 @@ static const struct gpu_ops gv11b_ops = {
|
||||
.program_zcull_mapping = gr_gv11b_program_zcull_mapping,
|
||||
.commit_global_timeslice = gr_gv11b_commit_global_timeslice,
|
||||
.commit_inst = gr_gv11b_commit_inst,
|
||||
.write_zcull_ptr = gr_gv11b_write_zcull_ptr,
|
||||
.write_pm_ptr = gr_gv11b_write_pm_ptr,
|
||||
.load_tpc_mask = gr_gv11b_load_tpc_mask,
|
||||
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -424,7 +423,6 @@ static const struct gpu_ops gv11b_ops = {
|
||||
.handle_tpc_sm_ecc_exception =
|
||||
gr_gv11b_handle_tpc_sm_ecc_exception,
|
||||
.decode_egpc_addr = gv11b_gr_decode_egpc_addr,
|
||||
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
|
||||
.init_gfxp_wfi_timeout_count =
|
||||
gr_gv11b_init_gfxp_wfi_timeout_count,
|
||||
.get_max_gfxp_wfi_timeout_count =
|
||||
@@ -455,6 +453,93 @@ static const struct gpu_ops gv11b_ops = {
|
||||
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
|
||||
.alloc_gfxp_rtv_cb = NULL,
|
||||
.commit_gfxp_rtv_cb = NULL,
|
||||
.ctxsw_prog = {
|
||||
.hw_get_fecs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_fecs_header_size,
|
||||
.hw_get_gpccs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
|
||||
.hw_get_extended_buffer_segments_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
|
||||
.hw_extended_marker_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
|
||||
.hw_get_perf_counter_control_register_stride =
|
||||
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
|
||||
.get_main_image_ctx_id =
|
||||
gm20b_ctxsw_prog_get_main_image_ctx_id,
|
||||
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
|
||||
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
|
||||
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
|
||||
.set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr,
|
||||
.set_zcull = gm20b_ctxsw_prog_set_zcull,
|
||||
.set_zcull_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
|
||||
.is_zcull_mode_separate_buffer =
|
||||
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
|
||||
.set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr,
|
||||
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
|
||||
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
|
||||
.set_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
|
||||
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
|
||||
.hw_get_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
|
||||
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
|
||||
.hw_get_pm_mode_stream_out_ctxsw =
|
||||
gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw,
|
||||
.set_pm_mode_stream_out_ctxsw =
|
||||
gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw,
|
||||
.init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data,
|
||||
.set_compute_preemption_mode_cta =
|
||||
gp10b_ctxsw_prog_set_compute_preemption_mode_cta,
|
||||
.set_compute_preemption_mode_cilp =
|
||||
gp10b_ctxsw_prog_set_compute_preemption_mode_cilp,
|
||||
.set_graphics_preemption_mode_gfxp =
|
||||
gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp,
|
||||
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
|
||||
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
|
||||
.set_priv_access_map_config_mode =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
|
||||
.set_priv_access_map_addr =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_addr,
|
||||
.disable_verif_features =
|
||||
gm20b_ctxsw_prog_disable_verif_features,
|
||||
.check_main_image_header_magic =
|
||||
gm20b_ctxsw_prog_check_main_image_header_magic,
|
||||
.check_local_header_magic =
|
||||
gm20b_ctxsw_prog_check_local_header_magic,
|
||||
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
|
||||
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
|
||||
.get_extended_buffer_size_offset =
|
||||
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
|
||||
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
|
||||
.get_local_priv_register_ctl_offset =
|
||||
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
|
||||
.hw_get_ts_tag_invalid_timestamp =
|
||||
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
|
||||
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
|
||||
.hw_record_ts_timestamp =
|
||||
gm20b_ctxsw_prog_hw_record_ts_timestamp,
|
||||
.hw_get_ts_record_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
|
||||
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
|
||||
.get_ts_buffer_aperture_mask =
|
||||
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
|
||||
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
|
||||
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
|
||||
.set_pmu_options_boost_clock_frequencies =
|
||||
gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies,
|
||||
.set_full_preemption_ptr =
|
||||
gv11b_ctxsw_prog_set_full_preemption_ptr,
|
||||
.set_full_preemption_ptr_veid0 =
|
||||
gv11b_ctxsw_prog_set_full_preemption_ptr_veid0,
|
||||
.hw_get_perf_counter_register_stride =
|
||||
gv11b_ctxsw_prog_hw_get_perf_counter_register_stride,
|
||||
.set_context_buffer_ptr =
|
||||
gv11b_ctxsw_prog_set_context_buffer_ptr,
|
||||
.set_type_per_veid_header =
|
||||
gv11b_ctxsw_prog_set_type_per_veid_header,
|
||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||
}
|
||||
},
|
||||
.fb = {
|
||||
.init_hw = gv11b_fb_init_hw,
|
||||
@@ -889,6 +974,7 @@ int gv11b_init_hal(struct gk20a *g)
|
||||
gops->ltc = gv11b_ops.ltc;
|
||||
gops->ce2 = gv11b_ops.ce2;
|
||||
gops->gr = gv11b_ops.gr;
|
||||
gops->gr.ctxsw_prog = gv11b_ops.gr.ctxsw_prog;
|
||||
gops->fb = gv11b_ops.fb;
|
||||
gops->clock_gating = gv11b_ops.clock_gating;
|
||||
gops->fifo = gv11b_ops.fifo;
|
||||
|
||||
@@ -31,7 +31,6 @@
|
||||
#include <nvgpu/channel.h>
|
||||
|
||||
#include <nvgpu/hw/gv11b/hw_ram_gv11b.h>
|
||||
#include <nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h>
|
||||
#include <nvgpu/hw/gv11b/hw_gr_gv11b.h>
|
||||
|
||||
#include "gv11b/subctx_gv11b.h"
|
||||
@@ -65,7 +64,8 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c)
|
||||
nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header");
|
||||
|
||||
if (!nvgpu_mem_is_valid(ctxheader)) {
|
||||
ret = nvgpu_dma_alloc_sys(g, ctxsw_prog_fecs_header_v(),
|
||||
ret = nvgpu_dma_alloc_sys(g,
|
||||
g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(),
|
||||
ctxheader);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "failed to allocate sub ctx header");
|
||||
@@ -100,7 +100,6 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
|
||||
struct nvgpu_mem *ctxheader = &c->ctx_header;
|
||||
struct gk20a *g = c->g;
|
||||
int ret = 0;
|
||||
u32 addr_lo, addr_hi;
|
||||
struct tsg_gk20a *tsg;
|
||||
struct nvgpu_gr_ctx *gr_ctx;
|
||||
|
||||
@@ -114,38 +113,20 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
|
||||
g->ops.mm.l2_flush(g, true);
|
||||
|
||||
/* set priv access map */
|
||||
addr_lo = u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
|
||||
addr_hi = u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
|
||||
nvgpu_mem_wr(g, ctxheader,
|
||||
ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
|
||||
addr_lo);
|
||||
nvgpu_mem_wr(g, ctxheader,
|
||||
ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
|
||||
addr_hi);
|
||||
g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, ctxheader,
|
||||
gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
|
||||
|
||||
addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
|
||||
addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
|
||||
nvgpu_mem_wr(g, ctxheader,
|
||||
ctxsw_prog_main_image_patch_adr_lo_o(),
|
||||
addr_lo);
|
||||
nvgpu_mem_wr(g, ctxheader,
|
||||
ctxsw_prog_main_image_patch_adr_hi_o(),
|
||||
addr_hi);
|
||||
g->ops.gr.ctxsw_prog.set_patch_addr(g, ctxheader,
|
||||
gr_ctx->patch_ctx.mem.gpu_va);
|
||||
|
||||
g->ops.gr.write_pm_ptr(g, ctxheader, gr_ctx->pm_ctx.mem.gpu_va);
|
||||
g->ops.gr.write_zcull_ptr(g, ctxheader, gr_ctx->zcull_ctx.gpu_va);
|
||||
g->ops.gr.ctxsw_prog.set_pm_ptr(g, ctxheader,
|
||||
gr_ctx->pm_ctx.mem.gpu_va);
|
||||
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, ctxheader,
|
||||
gr_ctx->zcull_ctx.gpu_va);
|
||||
|
||||
addr_lo = u64_lo32(gpu_va);
|
||||
addr_hi = u64_hi32(gpu_va);
|
||||
g->ops.gr.ctxsw_prog.set_context_buffer_ptr(g, ctxheader, gpu_va);
|
||||
|
||||
nvgpu_mem_wr(g, ctxheader,
|
||||
ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
|
||||
nvgpu_mem_wr(g, ctxheader,
|
||||
ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo);
|
||||
|
||||
nvgpu_mem_wr(g, ctxheader,
|
||||
ctxsw_prog_main_image_ctl_o(),
|
||||
ctxsw_prog_main_image_ctl_type_per_veid_header_v());
|
||||
g->ops.gr.ctxsw_prog.set_type_per_veid_header(g, ctxheader);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -45,13 +45,11 @@ struct gk20a_fecs_trace_record {
|
||||
};
|
||||
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
u32 gk20a_fecs_trace_record_ts_tag_invalid_ts_v(void);
|
||||
u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts);
|
||||
u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts);
|
||||
int gk20a_fecs_trace_num_ts(void);
|
||||
int gk20a_fecs_trace_num_ts(struct gk20a *g);
|
||||
struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(struct gk20a *g,
|
||||
int idx);
|
||||
bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r);
|
||||
bool gk20a_fecs_trace_is_valid_record(struct gk20a *g,
|
||||
struct gk20a_fecs_trace_record *r);
|
||||
int gk20a_fecs_trace_get_read_index(struct gk20a *g);
|
||||
int gk20a_fecs_trace_get_write_index(struct gk20a *g);
|
||||
|
||||
|
||||
@@ -342,7 +342,6 @@ struct gpu_ops {
|
||||
int (*update_smpc_ctxsw_mode)(struct gk20a *g,
|
||||
struct channel_gk20a *c,
|
||||
bool enable);
|
||||
u32 (*get_hw_accessor_stream_out_mode)(void);
|
||||
int (*update_hwpm_ctxsw_mode)(struct gk20a *g,
|
||||
struct channel_gk20a *c,
|
||||
u64 gpu_va,
|
||||
@@ -454,10 +453,6 @@ struct gpu_ops {
|
||||
int (*commit_global_timeslice)(struct gk20a *g,
|
||||
struct channel_gk20a *c);
|
||||
int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va);
|
||||
void (*write_zcull_ptr)(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va);
|
||||
void (*write_pm_ptr)(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va);
|
||||
void (*set_preemption_buffer_va)(struct gk20a *g,
|
||||
struct nvgpu_mem *mem, u64 gpu_va);
|
||||
void (*load_tpc_mask)(struct gk20a *g);
|
||||
@@ -479,8 +474,6 @@ struct gpu_ops {
|
||||
u32 gpc, u32 tpc, u32 sm);
|
||||
void (*resume_all_sms)(struct gk20a *g);
|
||||
void (*disable_rd_coalesce)(struct gk20a *g);
|
||||
void (*init_ctxsw_hdr_data)(struct gk20a *g,
|
||||
struct nvgpu_mem *mem);
|
||||
void (*init_gfxp_wfi_timeout_count)(struct gk20a *g);
|
||||
unsigned long (*get_max_gfxp_wfi_timeout_count)
|
||||
(struct gk20a *g);
|
||||
@@ -539,6 +532,96 @@ struct gpu_ops {
|
||||
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm);
|
||||
void (*commit_gfxp_rtv_cb)(struct gk20a *g,
|
||||
struct nvgpu_gr_ctx *gr_ctx, bool patch);
|
||||
struct {
|
||||
u32 (*hw_get_fecs_header_size)(void);
|
||||
u32 (*hw_get_gpccs_header_size)(void);
|
||||
u32 (*hw_get_extended_buffer_segments_size_in_bytes)(void);
|
||||
u32 (*hw_extended_marker_size_in_bytes)(void);
|
||||
u32 (*hw_get_perf_counter_control_register_stride)(void);
|
||||
u32 (*hw_get_perf_counter_register_stride)(void);
|
||||
u32 (*get_main_image_ctx_id)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
u32 (*get_patch_count)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void (*set_patch_count)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u32 count);
|
||||
void (*set_patch_addr)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr);
|
||||
void (*set_zcull_ptr)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr);
|
||||
void (*set_zcull)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u32 mode);
|
||||
void (*set_zcull_mode_no_ctxsw)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
bool (*is_zcull_mode_separate_buffer)(u32 mode);
|
||||
void (*set_pm_ptr)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr);
|
||||
void (*set_pm_mode)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u32 mode);
|
||||
void (*set_pm_smpc_mode)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, bool enable);
|
||||
u32 (*set_pm_mode_no_ctxsw)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
u32 (*set_pm_mode_ctxsw)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
u32 (*set_pm_mode_stream_out_ctxsw)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
u32 (*hw_get_pm_mode_no_ctxsw)(void);
|
||||
u32 (*hw_get_pm_mode_ctxsw)(void);
|
||||
u32 (*hw_get_pm_mode_stream_out_ctxsw)(void);
|
||||
void (*init_ctxsw_hdr_data)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void (*set_compute_preemption_mode_cta)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void (*set_compute_preemption_mode_cilp)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void (*set_graphics_preemption_mode_gfxp)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void (*set_cde_enabled)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void (*set_pc_sampling)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, bool enable);
|
||||
void (*set_priv_access_map_config_mode)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, bool allow_all);
|
||||
void (*set_priv_access_map_addr)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr);
|
||||
void (*disable_verif_features)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
bool (*check_main_image_header_magic)(u8 *context);
|
||||
bool (*check_local_header_magic)(u8 *context);
|
||||
u32 (*get_num_gpcs)(u8 *context);
|
||||
u32 (*get_num_tpcs)(u8 *context);
|
||||
void (*get_extended_buffer_size_offset)(u8 *context,
|
||||
u32 *size, u32 *offset);
|
||||
void (*get_ppc_info)(u8 *context,
|
||||
u32 *num_ppcs, u32 *ppc_mask);
|
||||
u32 (*get_local_priv_register_ctl_offset)(u8 *context);
|
||||
u32 (*hw_get_ts_tag_invalid_timestamp)(void);
|
||||
u32 (*hw_get_ts_tag)(u64 ts);
|
||||
u64 (*hw_record_ts_timestamp)(u64 ts);
|
||||
u32 (*hw_get_ts_record_size_in_bytes)(void);
|
||||
u32 (*is_ts_valid_record)(u32 magic_hi);
|
||||
u32 (*get_ts_buffer_aperture_mask)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void (*set_ts_num_records)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u32 num);
|
||||
void (*set_ts_buffer_ptr)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr,
|
||||
u32 aperture_mask);
|
||||
void (*set_pmu_options_boost_clock_frequencies)(
|
||||
struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u32 boosted_ctx);
|
||||
void (*set_context_buffer_ptr)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr);
|
||||
void (*set_full_preemption_ptr)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr);
|
||||
void (*set_full_preemption_ptr_veid0)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem, u64 addr);
|
||||
void (*set_type_per_veid_header)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
void (*dump_ctxsw_stats)(struct gk20a *g,
|
||||
struct nvgpu_mem *ctx_mem);
|
||||
} ctxsw_prog;
|
||||
} gr;
|
||||
struct {
|
||||
void (*init_hw)(struct gk20a *g);
|
||||
|
||||
@@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void)
|
||||
{
|
||||
return 0x00000100U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_gpccs_header_stride_v(void)
|
||||
{
|
||||
return 0x00000100U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
|
||||
{
|
||||
return 0x00000008U;
|
||||
@@ -118,6 +122,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
|
||||
{
|
||||
return U32(0x7U) << 0U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
|
||||
{
|
||||
return 0x1U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
|
||||
{
|
||||
return 0x0U;
|
||||
|
||||
@@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void)
|
||||
{
|
||||
return 0x00000100U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_gpccs_header_stride_v(void)
|
||||
{
|
||||
return 0x00000100U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
|
||||
{
|
||||
return 0x00000008U;
|
||||
@@ -102,6 +106,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
|
||||
{
|
||||
return U32(0x7U) << 0U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
|
||||
{
|
||||
return 0x1U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
|
||||
{
|
||||
return 0x0U;
|
||||
|
||||
@@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void)
|
||||
{
|
||||
return 0x00000100U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_gpccs_header_stride_v(void)
|
||||
{
|
||||
return 0x00000100U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
|
||||
{
|
||||
return 0x00000008U;
|
||||
@@ -106,6 +110,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
|
||||
{
|
||||
return U32(0x7U) << 0U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
|
||||
{
|
||||
return 0x1U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
|
||||
{
|
||||
return 0x0U;
|
||||
|
||||
@@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void)
|
||||
{
|
||||
return 0x00000100U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_gpccs_header_stride_v(void)
|
||||
{
|
||||
return 0x00000100U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
|
||||
{
|
||||
return 0x00000008U;
|
||||
@@ -142,6 +146,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
|
||||
{
|
||||
return U32(0x7U) << 0U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
|
||||
{
|
||||
return 0x1U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
|
||||
{
|
||||
return 0x0U;
|
||||
|
||||
@@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void)
|
||||
{
|
||||
return 0x00000100U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_gpccs_header_stride_v(void)
|
||||
{
|
||||
return 0x00000100U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
|
||||
{
|
||||
return 0x00000008U;
|
||||
@@ -142,6 +146,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
|
||||
{
|
||||
return U32(0x7U) << 0U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
|
||||
{
|
||||
return 0x1U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
|
||||
{
|
||||
return 0x0U;
|
||||
|
||||
@@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void)
|
||||
{
|
||||
return 0x00000100U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_gpccs_header_stride_v(void)
|
||||
{
|
||||
return 0x00000100U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
|
||||
{
|
||||
return 0x00000008U;
|
||||
@@ -142,6 +146,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
|
||||
{
|
||||
return U32(0x7U) << 0U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
|
||||
{
|
||||
return 0x1U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
|
||||
{
|
||||
return 0x0U;
|
||||
|
||||
@@ -36,7 +36,6 @@
|
||||
#include "os_linux.h"
|
||||
#include "ctxsw_trace.h"
|
||||
|
||||
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
|
||||
|
||||
#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
|
||||
|
||||
@@ -55,23 +55,24 @@ static int gk20a_fecs_trace_debugfs_ring_seq_show(
|
||||
struct gk20a_fecs_trace_record *r =
|
||||
gk20a_fecs_trace_get_record(g, *pos);
|
||||
int i;
|
||||
const u32 invalid_tag = gk20a_fecs_trace_record_ts_tag_invalid_ts_v();
|
||||
const u32 invalid_tag =
|
||||
g->ops.gr.ctxsw_prog.hw_get_ts_tag_invalid_timestamp();
|
||||
u32 tag;
|
||||
u64 timestamp;
|
||||
|
||||
seq_printf(s, "record #%lld (%p)\n", *pos, r);
|
||||
seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo);
|
||||
seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi);
|
||||
if (gk20a_fecs_trace_is_valid_record(r)) {
|
||||
if (gk20a_fecs_trace_is_valid_record(g, r)) {
|
||||
seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr);
|
||||
seq_printf(s, "\tcontext_id=%08x\n", r->context_id);
|
||||
seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr);
|
||||
seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id);
|
||||
for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
|
||||
tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
|
||||
for (i = 0; i < gk20a_fecs_trace_num_ts(g); i++) {
|
||||
tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]);
|
||||
if (tag == invalid_tag)
|
||||
continue;
|
||||
timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
|
||||
timestamp = g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]);
|
||||
timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
|
||||
seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp);
|
||||
}
|
||||
|
||||
@@ -31,7 +31,6 @@
|
||||
#include "os_linux.h"
|
||||
#include "ioctl_tsg.h"
|
||||
|
||||
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
|
||||
|
||||
ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf,
|
||||
|
||||
@@ -38,6 +38,9 @@
|
||||
#include "common/fb/fb_tu104.h"
|
||||
#include "common/xve/xve_gp106.h"
|
||||
#include "common/netlist/netlist_tu104.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
|
||||
#include "common/therm/therm_gm20b.h"
|
||||
#include "common/therm/therm_gp10b.h"
|
||||
#include "common/therm/therm_gp106.h"
|
||||
@@ -412,8 +415,6 @@ static const struct gpu_ops tu104_ops = {
|
||||
.enable_exceptions = gr_gv11b_enable_exceptions,
|
||||
.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
|
||||
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
|
||||
.get_hw_accessor_stream_out_mode =
|
||||
gr_gv100_get_hw_accessor_stream_out_mode,
|
||||
.get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon,
|
||||
.set_pmm_register = gr_gv100_set_pmm_register,
|
||||
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
|
||||
@@ -430,8 +431,6 @@ static const struct gpu_ops tu104_ops = {
|
||||
.program_zcull_mapping = gr_gv11b_program_zcull_mapping,
|
||||
.commit_global_timeslice = gr_gv11b_commit_global_timeslice,
|
||||
.commit_inst = gr_gv11b_commit_inst,
|
||||
.write_zcull_ptr = gr_gv11b_write_zcull_ptr,
|
||||
.write_pm_ptr = gr_gv11b_write_pm_ptr,
|
||||
.load_tpc_mask = gr_gv11b_load_tpc_mask,
|
||||
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -488,7 +487,6 @@ static const struct gpu_ops tu104_ops = {
|
||||
.handle_tpc_sm_ecc_exception =
|
||||
gr_gv11b_handle_tpc_sm_ecc_exception,
|
||||
.decode_egpc_addr = gv11b_gr_decode_egpc_addr,
|
||||
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
|
||||
.init_gfxp_wfi_timeout_count =
|
||||
gr_gv11b_init_gfxp_wfi_timeout_count,
|
||||
.get_max_gfxp_wfi_timeout_count =
|
||||
@@ -517,6 +515,93 @@ static const struct gpu_ops tu104_ops = {
|
||||
.dump_gr_falcon_stats = gk20a_fecs_dump_falcon_stats,
|
||||
.get_fecs_ctx_state_store_major_rev_id =
|
||||
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
|
||||
.ctxsw_prog = {
|
||||
.hw_get_fecs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_fecs_header_size,
|
||||
.hw_get_gpccs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
|
||||
.hw_get_extended_buffer_segments_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
|
||||
.hw_extended_marker_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
|
||||
.hw_get_perf_counter_control_register_stride =
|
||||
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
|
||||
.get_main_image_ctx_id =
|
||||
gm20b_ctxsw_prog_get_main_image_ctx_id,
|
||||
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
|
||||
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
|
||||
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
|
||||
.set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr,
|
||||
.set_zcull = gm20b_ctxsw_prog_set_zcull,
|
||||
.set_zcull_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
|
||||
.is_zcull_mode_separate_buffer =
|
||||
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
|
||||
.set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr,
|
||||
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
|
||||
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
|
||||
.set_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
|
||||
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
|
||||
.hw_get_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
|
||||
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
|
||||
.hw_get_pm_mode_stream_out_ctxsw =
|
||||
gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw,
|
||||
.set_pm_mode_stream_out_ctxsw =
|
||||
gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw,
|
||||
.init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data,
|
||||
.set_compute_preemption_mode_cta =
|
||||
gm20b_ctxsw_prog_set_compute_preemption_mode_cta,
|
||||
.set_compute_preemption_mode_cilp =
|
||||
gp10b_ctxsw_prog_set_compute_preemption_mode_cilp,
|
||||
.set_graphics_preemption_mode_gfxp =
|
||||
gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp,
|
||||
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
|
||||
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
|
||||
.set_priv_access_map_config_mode =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
|
||||
.set_priv_access_map_addr =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_addr,
|
||||
.disable_verif_features =
|
||||
gm20b_ctxsw_prog_disable_verif_features,
|
||||
.check_main_image_header_magic =
|
||||
gm20b_ctxsw_prog_check_main_image_header_magic,
|
||||
.check_local_header_magic =
|
||||
gm20b_ctxsw_prog_check_local_header_magic,
|
||||
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
|
||||
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
|
||||
.get_extended_buffer_size_offset =
|
||||
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
|
||||
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
|
||||
.get_local_priv_register_ctl_offset =
|
||||
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
|
||||
.hw_get_ts_tag_invalid_timestamp =
|
||||
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
|
||||
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
|
||||
.hw_record_ts_timestamp =
|
||||
gm20b_ctxsw_prog_hw_record_ts_timestamp,
|
||||
.hw_get_ts_record_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
|
||||
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
|
||||
.get_ts_buffer_aperture_mask =
|
||||
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
|
||||
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
|
||||
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
|
||||
.set_pmu_options_boost_clock_frequencies =
|
||||
gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies,
|
||||
.set_full_preemption_ptr =
|
||||
gv11b_ctxsw_prog_set_full_preemption_ptr,
|
||||
.set_full_preemption_ptr_veid0 =
|
||||
gv11b_ctxsw_prog_set_full_preemption_ptr_veid0,
|
||||
.hw_get_perf_counter_register_stride =
|
||||
gv11b_ctxsw_prog_hw_get_perf_counter_register_stride,
|
||||
.set_context_buffer_ptr =
|
||||
gv11b_ctxsw_prog_set_context_buffer_ptr,
|
||||
.set_type_per_veid_header =
|
||||
gv11b_ctxsw_prog_set_type_per_veid_header,
|
||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||
}
|
||||
},
|
||||
.fb = {
|
||||
.init_hw = gv11b_fb_init_hw,
|
||||
@@ -1027,6 +1112,7 @@ int tu104_init_hal(struct gk20a *g)
|
||||
gops->ltc = tu104_ops.ltc;
|
||||
gops->ce2 = tu104_ops.ce2;
|
||||
gops->gr = tu104_ops.gr;
|
||||
gops->gr.ctxsw_prog = tu104_ops.gr.ctxsw_prog;
|
||||
gops->fb = tu104_ops.fb;
|
||||
gops->clock_gating = tu104_ops.clock_gating;
|
||||
gops->fifo = tu104_ops.fifo;
|
||||
|
||||
@@ -28,6 +28,8 @@
|
||||
#include "common/fb/fb_gm20b.h"
|
||||
#include "common/fb/fb_gp10b.h"
|
||||
#include "common/netlist/netlist_gp10b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
||||
#include "common/therm/therm_gm20b.h"
|
||||
#include "common/therm/therm_gp10b.h"
|
||||
#include "common/ltc/ltc_gm20b.h"
|
||||
@@ -176,8 +178,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
||||
.program_zcull_mapping = NULL,
|
||||
.commit_global_timeslice = NULL,
|
||||
.commit_inst = vgpu_gr_commit_inst,
|
||||
.write_zcull_ptr = gr_gk20a_write_zcull_ptr,
|
||||
.write_pm_ptr = gr_gk20a_write_pm_ptr,
|
||||
.load_tpc_mask = NULL,
|
||||
.trigger_suspend = NULL,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -212,7 +212,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
||||
.set_bes_crop_debug4 = NULL,
|
||||
.set_ctxsw_preemption_mode =
|
||||
vgpu_gr_gp10b_set_ctxsw_preemption_mode,
|
||||
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
|
||||
.init_gfxp_wfi_timeout_count =
|
||||
gr_gp10b_init_gfxp_wfi_timeout_count,
|
||||
.get_max_gfxp_wfi_timeout_count =
|
||||
@@ -230,6 +229,81 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
||||
.get_offset_in_gpccs_segment =
|
||||
gr_gk20a_get_offset_in_gpccs_segment,
|
||||
.set_debug_mode = gm20b_gr_set_debug_mode,
|
||||
.ctxsw_prog = {
|
||||
.hw_get_fecs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_fecs_header_size,
|
||||
.hw_get_gpccs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
|
||||
.hw_get_extended_buffer_segments_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
|
||||
.hw_extended_marker_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
|
||||
.hw_get_perf_counter_control_register_stride =
|
||||
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
|
||||
.get_main_image_ctx_id =
|
||||
gm20b_ctxsw_prog_get_main_image_ctx_id,
|
||||
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
|
||||
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
|
||||
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
|
||||
.set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr,
|
||||
.set_zcull = gm20b_ctxsw_prog_set_zcull,
|
||||
.set_zcull_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
|
||||
.is_zcull_mode_separate_buffer =
|
||||
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
|
||||
.set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr,
|
||||
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
|
||||
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
|
||||
.set_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
|
||||
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
|
||||
.hw_get_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
|
||||
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
|
||||
.init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data,
|
||||
.set_compute_preemption_mode_cta =
|
||||
gp10b_ctxsw_prog_set_compute_preemption_mode_cta,
|
||||
.set_compute_preemption_mode_cilp =
|
||||
gp10b_ctxsw_prog_set_compute_preemption_mode_cilp,
|
||||
.set_graphics_preemption_mode_gfxp =
|
||||
gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp,
|
||||
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
|
||||
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
|
||||
.set_priv_access_map_config_mode =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
|
||||
.set_priv_access_map_addr =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_addr,
|
||||
.disable_verif_features =
|
||||
gm20b_ctxsw_prog_disable_verif_features,
|
||||
.check_main_image_header_magic =
|
||||
gm20b_ctxsw_prog_check_main_image_header_magic,
|
||||
.check_local_header_magic =
|
||||
gm20b_ctxsw_prog_check_local_header_magic,
|
||||
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
|
||||
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
|
||||
.get_extended_buffer_size_offset =
|
||||
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
|
||||
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
|
||||
.get_local_priv_register_ctl_offset =
|
||||
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
|
||||
.hw_get_ts_tag_invalid_timestamp =
|
||||
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
|
||||
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
|
||||
.hw_record_ts_timestamp =
|
||||
gm20b_ctxsw_prog_hw_record_ts_timestamp,
|
||||
.hw_get_ts_record_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
|
||||
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
|
||||
.get_ts_buffer_aperture_mask =
|
||||
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
|
||||
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
|
||||
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
|
||||
.set_pmu_options_boost_clock_frequencies =
|
||||
gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies,
|
||||
.set_full_preemption_ptr =
|
||||
gp10b_ctxsw_prog_set_full_preemption_ptr,
|
||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||
}
|
||||
},
|
||||
.fb = {
|
||||
.init_hw = NULL,
|
||||
@@ -575,6 +649,7 @@ int vgpu_gp10b_init_hal(struct gk20a *g)
|
||||
gops->ltc = vgpu_gp10b_ops.ltc;
|
||||
gops->ce2 = vgpu_gp10b_ops.ce2;
|
||||
gops->gr = vgpu_gp10b_ops.gr;
|
||||
gops->gr.ctxsw_prog = vgpu_gp10b_ops.gr.ctxsw_prog;
|
||||
gops->fb = vgpu_gp10b_ops.fb;
|
||||
gops->clock_gating = vgpu_gp10b_ops.clock_gating;
|
||||
gops->fifo = vgpu_gp10b_ops.fifo;
|
||||
|
||||
@@ -39,7 +39,6 @@
|
||||
#include "gk20a/fecs_trace_gk20a.h"
|
||||
|
||||
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
|
||||
|
||||
void vgpu_gr_detect_sm_arch(struct gk20a *g)
|
||||
{
|
||||
@@ -614,7 +613,7 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
|
||||
}
|
||||
|
||||
/* PM ctxt switch is off by default */
|
||||
gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
|
||||
gr_ctx->pm_ctx.pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw();
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
return 0;
|
||||
@@ -1087,18 +1086,21 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
* will return an error due to using the same GPU VA twice.
|
||||
*/
|
||||
|
||||
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
|
||||
if (pm_ctx->pm_mode ==
|
||||
g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw()) {
|
||||
return 0;
|
||||
}
|
||||
p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
|
||||
} else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
|
||||
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
|
||||
if (pm_ctx->pm_mode ==
|
||||
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) {
|
||||
return 0;
|
||||
}
|
||||
p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
|
||||
} else if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
|
||||
(g->ops.gr.get_hw_accessor_stream_out_mode)){
|
||||
if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) {
|
||||
g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) {
|
||||
if (pm_ctx->pm_mode ==
|
||||
g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) {
|
||||
return 0;
|
||||
}
|
||||
p->mode = TEGRA_VGPU_CTXSW_MODE_STREAM_OUT_CTXSW;
|
||||
@@ -1130,11 +1132,14 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
err = err ? err : msg.ret;
|
||||
if (!err) {
|
||||
if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW) {
|
||||
pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
|
||||
pm_ctx->pm_mode =
|
||||
g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw();
|
||||
} else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
|
||||
pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
|
||||
pm_ctx->pm_mode =
|
||||
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw();
|
||||
} else {
|
||||
pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode();
|
||||
pm_ctx->pm_mode =
|
||||
g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -29,6 +29,9 @@
|
||||
#include "common/fb/fb_gp10b.h"
|
||||
#include "common/fb/fb_gv11b.h"
|
||||
#include "common/netlist/netlist_gv11b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
|
||||
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
|
||||
#include "common/therm/therm_gm20b.h"
|
||||
#include "common/therm/therm_gp10b.h"
|
||||
#include "common/therm/therm_gv11b.h"
|
||||
@@ -177,8 +180,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
||||
.enable_exceptions = NULL,
|
||||
.get_lrf_tex_ltc_dram_override = NULL,
|
||||
.update_smpc_ctxsw_mode = vgpu_gr_update_smpc_ctxsw_mode,
|
||||
.get_hw_accessor_stream_out_mode =
|
||||
gr_gv100_get_hw_accessor_stream_out_mode,
|
||||
.update_hwpm_ctxsw_mode = vgpu_gr_update_hwpm_ctxsw_mode,
|
||||
.record_sm_error_state = gv11b_gr_record_sm_error_state,
|
||||
.clear_sm_error_state = vgpu_gr_clear_sm_error_state,
|
||||
@@ -192,8 +193,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
||||
.program_zcull_mapping = NULL,
|
||||
.commit_global_timeslice = NULL,
|
||||
.commit_inst = vgpu_gr_gv11b_commit_inst,
|
||||
.write_zcull_ptr = gr_gv11b_write_zcull_ptr,
|
||||
.write_pm_ptr = gr_gv11b_write_pm_ptr,
|
||||
.load_tpc_mask = NULL,
|
||||
.trigger_suspend = NULL,
|
||||
.wait_for_pause = gr_gk20a_wait_for_pause,
|
||||
@@ -247,7 +246,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
||||
.handle_tpc_sm_ecc_exception =
|
||||
gr_gv11b_handle_tpc_sm_ecc_exception,
|
||||
.decode_egpc_addr = gv11b_gr_decode_egpc_addr,
|
||||
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
|
||||
.init_gfxp_wfi_timeout_count =
|
||||
gr_gv11b_init_gfxp_wfi_timeout_count,
|
||||
.get_max_gfxp_wfi_timeout_count =
|
||||
@@ -265,6 +263,93 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
||||
.get_offset_in_gpccs_segment =
|
||||
gr_gk20a_get_offset_in_gpccs_segment,
|
||||
.set_debug_mode = gm20b_gr_set_debug_mode,
|
||||
.ctxsw_prog = {
|
||||
.hw_get_fecs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_fecs_header_size,
|
||||
.hw_get_gpccs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
|
||||
.hw_get_extended_buffer_segments_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
|
||||
.hw_extended_marker_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
|
||||
.hw_get_perf_counter_control_register_stride =
|
||||
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
|
||||
.get_main_image_ctx_id =
|
||||
gm20b_ctxsw_prog_get_main_image_ctx_id,
|
||||
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
|
||||
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
|
||||
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
|
||||
.set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr,
|
||||
.set_zcull = gm20b_ctxsw_prog_set_zcull,
|
||||
.set_zcull_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
|
||||
.is_zcull_mode_separate_buffer =
|
||||
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
|
||||
.set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr,
|
||||
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
|
||||
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
|
||||
.set_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
|
||||
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
|
||||
.hw_get_pm_mode_no_ctxsw =
|
||||
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
|
||||
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
|
||||
.hw_get_pm_mode_stream_out_ctxsw =
|
||||
gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw,
|
||||
.set_pm_mode_stream_out_ctxsw =
|
||||
gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw,
|
||||
.init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data,
|
||||
.set_compute_preemption_mode_cta =
|
||||
gp10b_ctxsw_prog_set_compute_preemption_mode_cta,
|
||||
.set_compute_preemption_mode_cilp =
|
||||
gp10b_ctxsw_prog_set_compute_preemption_mode_cilp,
|
||||
.set_graphics_preemption_mode_gfxp =
|
||||
gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp,
|
||||
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
|
||||
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
|
||||
.set_priv_access_map_config_mode =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
|
||||
.set_priv_access_map_addr =
|
||||
gm20b_ctxsw_prog_set_priv_access_map_addr,
|
||||
.disable_verif_features =
|
||||
gm20b_ctxsw_prog_disable_verif_features,
|
||||
.check_main_image_header_magic =
|
||||
gm20b_ctxsw_prog_check_main_image_header_magic,
|
||||
.check_local_header_magic =
|
||||
gm20b_ctxsw_prog_check_local_header_magic,
|
||||
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
|
||||
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
|
||||
.get_extended_buffer_size_offset =
|
||||
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
|
||||
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
|
||||
.get_local_priv_register_ctl_offset =
|
||||
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
|
||||
.hw_get_ts_tag_invalid_timestamp =
|
||||
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
|
||||
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
|
||||
.hw_record_ts_timestamp =
|
||||
gm20b_ctxsw_prog_hw_record_ts_timestamp,
|
||||
.hw_get_ts_record_size_in_bytes =
|
||||
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
|
||||
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
|
||||
.get_ts_buffer_aperture_mask =
|
||||
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
|
||||
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
|
||||
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
|
||||
.set_pmu_options_boost_clock_frequencies =
|
||||
gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies,
|
||||
.set_full_preemption_ptr =
|
||||
gv11b_ctxsw_prog_set_full_preemption_ptr,
|
||||
.set_full_preemption_ptr_veid0 =
|
||||
gv11b_ctxsw_prog_set_full_preemption_ptr_veid0,
|
||||
.hw_get_perf_counter_register_stride =
|
||||
gv11b_ctxsw_prog_hw_get_perf_counter_register_stride,
|
||||
.set_context_buffer_ptr =
|
||||
gv11b_ctxsw_prog_set_context_buffer_ptr,
|
||||
.set_type_per_veid_header =
|
||||
gv11b_ctxsw_prog_set_type_per_veid_header,
|
||||
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
|
||||
}
|
||||
},
|
||||
.fb = {
|
||||
.init_hw = NULL,
|
||||
@@ -640,6 +725,7 @@ int vgpu_gv11b_init_hal(struct gk20a *g)
|
||||
gops->ltc = vgpu_gv11b_ops.ltc;
|
||||
gops->ce2 = vgpu_gv11b_ops.ce2;
|
||||
gops->gr = vgpu_gv11b_ops.gr;
|
||||
gops->gr.ctxsw_prog = vgpu_gv11b_ops.gr.ctxsw_prog;
|
||||
gops->fb = vgpu_gv11b_ops.fb;
|
||||
gops->clock_gating = vgpu_gv11b_ops.clock_gating;
|
||||
gops->fifo = vgpu_gv11b_ops.fifo;
|
||||
|
||||
@@ -27,7 +27,6 @@
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/channel.h>
|
||||
|
||||
#include <nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h>
|
||||
|
||||
int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c)
|
||||
{
|
||||
@@ -41,8 +40,8 @@ int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c)
|
||||
msg.handle = vgpu_get_handle(c->g);
|
||||
p->ch_handle = c->virt_ctx;
|
||||
p->ctx_header_va = __nvgpu_vm_alloc_va(c->vm,
|
||||
ctxsw_prog_fecs_header_v(),
|
||||
GMMU_PAGE_SIZE_KERNEL);
|
||||
c->g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(),
|
||||
GMMU_PAGE_SIZE_KERNEL);
|
||||
if (!p->ctx_header_va) {
|
||||
nvgpu_err(c->g, "alloc va failed for ctx_header");
|
||||
return -ENOMEM;
|
||||
|
||||
Reference in New Issue
Block a user