gpu: nvgpu: add separate unit for gr/ctxsw_prog

Add separate new unit gr/ctxsw_prog that provides interface to access
h/w header files hw_ctxsw_prog_*.h

Add below chip specific files that access above h/w unit and provide
interface through g->ops.gr.ctxsw_prog.*() HAL for rest of the units

common/gr/ctxsw_prog/ctxsw_prog_gm20b.c
common/gr/ctxsw_prog/ctxsw_prog_gp10b.c
common/gr/ctxsw_prog/ctxsw_prog_gv11b.c

Remove all the h/w header includes from rest of the units and code.
Remove direct calls to h/w headers ctxsw_prog_*() and use HALs
g->ops.gr.ctxsw_prog.*() instead

In gr_gk20a_find_priv_offset_in_ext_buffer(), h/w header
ctxsw_prog_extended_num_smpc_quadrants_v() is only defined on gk20a
And since we don't support gk20a remove corresponding code

Add missing h/w header ctxsw_prog_main_image_pm_mode_ctxsw_f() for
some chips
Add new h/w header ctxsw_prog_gpccs_header_stride_v()

Jira NVGPU-1526

Change-Id: I170f5c0da26ada833f94f5479ff299c0db56a732
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1966111
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2018-11-30 17:19:50 +05:30
committed by mobile promotions
parent 8ef20036c7
commit 6777bd5ed2
41 changed files with 1748 additions and 556 deletions

View File

@@ -68,6 +68,9 @@ nvgpu-y += common/bus/bus_gk20a.o \
common/ltc/ltc_gp10b.o \
common/ltc/ltc_gv11b.o \
common/ltc/ltc_tu104.o \
common/gr/ctxsw_prog/ctxsw_prog_gm20b.o \
common/gr/ctxsw_prog/ctxsw_prog_gp10b.o \
common/gr/ctxsw_prog/ctxsw_prog_gv11b.o \
common/netlist/netlist.o \
common/netlist/netlist_sim.o \
common/netlist/netlist_gm20b.o \

View File

@@ -106,6 +106,9 @@ srcs := os/posix/nvgpu.c \
common/falcon/falcon_gp106.c \
common/falcon/falcon_gv100.c \
common/falcon/falcon_tu104.c \
common/gr/ctxsw_prog/ctxsw_prog_gm20b.c \
common/gr/ctxsw_prog/ctxsw_prog_gp10b.c \
common/gr/ctxsw_prog/ctxsw_prog_gv11b.c \
common/netlist/netlist.c \
common/netlist/netlist_sim.c \
common/netlist/netlist_gm20b.c \

View File

@@ -0,0 +1,345 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/utils.h>
#include <nvgpu/nvgpu_mem.h>
#include "ctxsw_prog_gm20b.h"
#include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h>
u32 gm20b_ctxsw_prog_hw_get_fecs_header_size(void)
{
return ctxsw_prog_fecs_header_v();
}
u32 gm20b_ctxsw_prog_hw_get_gpccs_header_size(void)
{
return ctxsw_prog_gpccs_header_stride_v();
}
u32 gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes(void)
{
return ctxsw_prog_extended_buffer_segments_size_in_bytes_v();
}
u32 gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes(void)
{
return ctxsw_prog_extended_marker_size_in_bytes_v();
}
u32 gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride(void)
{
return ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
}
u32 gm20b_ctxsw_prog_get_main_image_ctx_id(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
return nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_context_id_o());
}
u32 gm20b_ctxsw_prog_get_patch_count(struct gk20a *g, struct nvgpu_mem *ctx_mem)
{
return nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_patch_count_o());
}
void gm20b_ctxsw_prog_set_patch_count(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 count)
{
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_patch_count_o(), count);
}
void gm20b_ctxsw_prog_set_patch_addr(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr)
{
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_patch_adr_lo_o(), u64_lo32(addr));
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_patch_adr_hi_o(), u64_hi32(addr));
}
void gm20b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
u64 addr)
{
addr = addr >> 8;
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_ptr_o(),
u64_lo32(addr));
}
void gm20b_ctxsw_prog_set_zcull(struct gk20a *g, struct nvgpu_mem *ctx_mem,
u32 mode)
{
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_o(), mode);
}
void gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_o(),
ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
}
bool gm20b_ctxsw_prog_is_zcull_mode_separate_buffer(u32 mode)
{
return mode == ctxsw_prog_main_image_zcull_mode_separate_buffer_v();
}
void gm20b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
u64 addr)
{
addr = addr >> 8;
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_ptr_o(),
u64_lo32(addr));
}
void gm20b_ctxsw_prog_set_pm_mode(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 mode)
{
u32 data;
data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o());
data = data & ~ctxsw_prog_main_image_pm_mode_m();
data |= mode;
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data);
}
void gm20b_ctxsw_prog_set_pm_smpc_mode(struct gk20a *g,
struct nvgpu_mem *ctx_mem, bool enable)
{
u32 data;
data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o());
data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
data |= enable ?
ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data);
}
u32 gm20b_ctxsw_prog_set_pm_mode_no_ctxsw(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
gm20b_ctxsw_prog_set_pm_mode(g, ctx_mem,
ctxsw_prog_main_image_pm_mode_no_ctxsw_f());
return ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
}
u32 gm20b_ctxsw_prog_set_pm_mode_ctxsw(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
gm20b_ctxsw_prog_set_pm_mode(g, ctx_mem,
ctxsw_prog_main_image_pm_mode_ctxsw_f());
return ctxsw_prog_main_image_pm_mode_ctxsw_f();
}
u32 gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw(void)
{
return ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
}
u32 gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw(void)
{
return ctxsw_prog_main_image_pm_mode_ctxsw_f();
}
void gm20b_ctxsw_prog_init_ctxsw_hdr_data(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_num_save_ops_o(), 0);
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_num_restore_ops_o(), 0);
}
void gm20b_ctxsw_prog_set_compute_preemption_mode_cta(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_preemption_options_o(),
ctxsw_prog_main_image_preemption_options_control_cta_enabled_f());
}
void gm20b_ctxsw_prog_set_cde_enabled(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
u32 data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_ctl_o());
data |= ctxsw_prog_main_image_ctl_cde_enabled_f();
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_ctl_o(), data);
}
void gm20b_ctxsw_prog_set_pc_sampling(struct gk20a *g,
struct nvgpu_mem *ctx_mem, bool enable)
{
u32 data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o());
data &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
data |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data);
}
void gm20b_ctxsw_prog_set_priv_access_map_config_mode(struct gk20a *g,
struct nvgpu_mem *ctx_mem, bool allow_all)
{
if (allow_all) {
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_priv_access_map_config_o(),
ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f());
} else {
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_priv_access_map_config_o(),
ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f());
}
}
void gm20b_ctxsw_prog_set_priv_access_map_addr(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr)
{
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
u64_lo32(addr));
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
u64_hi32(addr));
}
void gm20b_ctxsw_prog_disable_verif_features(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
u32 data;
data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_misc_options_o());
data = data & ~ctxsw_prog_main_image_misc_options_verif_features_m();
data = data | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_misc_options_o(), data);
}
bool gm20b_ctxsw_prog_check_main_image_header_magic(u8 *context)
{
u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o());
return magic == ctxsw_prog_main_image_magic_value_v_value_v();
}
bool gm20b_ctxsw_prog_check_local_header_magic(u8 *context)
{
u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o());
return magic == ctxsw_prog_local_magic_value_v_value_v();
}
u32 gm20b_ctxsw_prog_get_num_gpcs(u8 *context)
{
return *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
}
u32 gm20b_ctxsw_prog_get_num_tpcs(u8 *context)
{
return *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o());
}
void gm20b_ctxsw_prog_get_extended_buffer_size_offset(u8 *context,
u32 *size, u32 *offset)
{
u32 data = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o());
*size = ctxsw_prog_main_extended_buffer_ctl_size_v(data);
*offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data);
}
void gm20b_ctxsw_prog_get_ppc_info(u8 *context, u32 *num_ppcs, u32 *ppc_mask)
{
u32 data = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o());
*num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data);
*ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data);
}
u32 gm20b_ctxsw_prog_get_local_priv_register_ctl_offset(u8 *context)
{
u32 data = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
return ctxsw_prog_local_priv_register_ctl_offset_v(data);
}
u32 gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp(void)
{
return ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
}
u32 gm20b_ctxsw_prog_hw_get_ts_tag(u64 ts)
{
return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32));
}
u64 gm20b_ctxsw_prog_hw_record_ts_timestamp(u64 ts)
{
return ts &
~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32);
}
u32 gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes(void)
{
return ctxsw_prog_record_timestamp_record_size_in_bytes_v();
}
u32 gm20b_ctxsw_prog_is_ts_valid_record(u32 magic_hi)
{
return magic_hi ==
ctxsw_prog_record_timestamp_magic_value_hi_v_value_v();
}
u32 gm20b_ctxsw_prog_get_ts_buffer_aperture_mask(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
return nvgpu_aperture_mask(g, ctx_mem,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
}
void gm20b_ctxsw_prog_set_ts_num_records(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 num)
{
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(num));
}
void gm20b_ctxsw_prog_set_ts_buffer_ptr(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr, u32 aperture_mask)
{
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
u64_lo32(addr));
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(u64_hi32(addr)) |
aperture_mask);
}

View File

@@ -0,0 +1,95 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_CTXSW_PROG_GM20B_H
#define NVGPU_CTXSW_PROG_GM20B_H
#include <nvgpu/types.h>
struct gk20a;
struct nvgpu_mem;
u32 gm20b_ctxsw_prog_hw_get_fecs_header_size(void);
u32 gm20b_ctxsw_prog_hw_get_gpccs_header_size(void);
u32 gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes(void);
u32 gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes(void);
u32 gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride(void);
u32 gm20b_ctxsw_prog_get_main_image_ctx_id(struct gk20a *g, struct nvgpu_mem *ctx_mem);
u32 gm20b_ctxsw_prog_get_patch_count(struct gk20a *g, struct nvgpu_mem *ctx_mem);
void gm20b_ctxsw_prog_set_patch_count(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 count);
void gm20b_ctxsw_prog_set_patch_addr(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr);
void gm20b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
u64 addr);
void gm20b_ctxsw_prog_set_zcull(struct gk20a *g, struct nvgpu_mem *ctx_mem,
u32 mode);
void gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
bool gm20b_ctxsw_prog_is_zcull_mode_separate_buffer(u32 mode);
void gm20b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
u64 addr);
void gm20b_ctxsw_prog_set_pm_mode(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 mode);
void gm20b_ctxsw_prog_set_pm_smpc_mode(struct gk20a *g,
struct nvgpu_mem *ctx_mem, bool enable);
u32 gm20b_ctxsw_prog_set_pm_mode_no_ctxsw(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
u32 gm20b_ctxsw_prog_set_pm_mode_ctxsw(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
u32 gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw(void);
u32 gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw(void);
void gm20b_ctxsw_prog_init_ctxsw_hdr_data(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void gm20b_ctxsw_prog_set_compute_preemption_mode_cta(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void gm20b_ctxsw_prog_set_cde_enabled(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void gm20b_ctxsw_prog_set_pc_sampling(struct gk20a *g,
struct nvgpu_mem *ctx_mem, bool enable);
void gm20b_ctxsw_prog_set_priv_access_map_config_mode(struct gk20a *g,
struct nvgpu_mem *ctx_mem, bool allow_all);
void gm20b_ctxsw_prog_set_priv_access_map_addr(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr);
void gm20b_ctxsw_prog_disable_verif_features(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
bool gm20b_ctxsw_prog_check_main_image_header_magic(u8 *context);
bool gm20b_ctxsw_prog_check_local_header_magic(u8 *context);
u32 gm20b_ctxsw_prog_get_num_gpcs(u8 *context);
u32 gm20b_ctxsw_prog_get_num_tpcs(u8 *context);
void gm20b_ctxsw_prog_get_extended_buffer_size_offset(u8 *context,
u32 *size, u32 *offset);
void gm20b_ctxsw_prog_get_ppc_info(u8 *context, u32 *num_ppcs, u32 *ppc_mask);
u32 gm20b_ctxsw_prog_get_local_priv_register_ctl_offset(u8 *context);
u32 gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp(void);
u32 gm20b_ctxsw_prog_hw_get_ts_tag(u64 ts);
u64 gm20b_ctxsw_prog_hw_record_ts_timestamp(u64 ts);
u32 gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes(void);
u32 gm20b_ctxsw_prog_is_ts_valid_record(u32 magic_hi);
u32 gm20b_ctxsw_prog_get_ts_buffer_aperture_mask(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void gm20b_ctxsw_prog_set_ts_num_records(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 num);
void gm20b_ctxsw_prog_set_ts_buffer_ptr(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr, u32 aperture_mask);
#endif /* NVGPU_CTXSW_PROG_GM20B_H */

View File

@@ -0,0 +1,129 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/utils.h>
#include <nvgpu/nvgpu_mem.h>
#include "ctxsw_prog_gm20b.h"
#include "ctxsw_prog_gp10b.h"
#include <nvgpu/hw/gp10b/hw_ctxsw_prog_gp10b.h>
void gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_graphics_preemption_options_o(),
ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f());
}
void gp10b_ctxsw_prog_set_compute_preemption_mode_cta(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_compute_preemption_options_o(),
ctxsw_prog_main_image_compute_preemption_options_control_cta_f());
}
void gp10b_ctxsw_prog_set_compute_preemption_mode_cilp(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_compute_preemption_options_o(),
ctxsw_prog_main_image_compute_preemption_options_control_cilp_f());
}
void gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 boosted_ctx)
{
u32 data = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f(boosted_ctx);
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pmu_options_o(), data);
}
void gp10b_ctxsw_prog_set_full_preemption_ptr(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr)
{
addr = addr >> 8;
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_full_preemption_ptr_o(), u64_lo32(addr));
}
void gp10b_ctxsw_prog_init_ctxsw_hdr_data(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_num_wfi_save_ops_o(), 0);
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_num_cta_save_ops_o(), 0);
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0);
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_num_cilp_save_ops_o(), 0);
gm20b_ctxsw_prog_init_ctxsw_hdr_data(g, ctx_mem);
}
void gp10b_ctxsw_prog_dump_ctxsw_stats(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
nvgpu_err(g, "ctxsw_prog_main_image_magic_value_o : %x (expect %x)",
nvgpu_mem_rd(g, ctx_mem,
ctxsw_prog_main_image_magic_value_o()),
ctxsw_prog_main_image_magic_value_v_value_v());
nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x",
nvgpu_mem_rd(g, ctx_mem,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o()));
nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x",
nvgpu_mem_rd(g, ctx_mem,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_o()));
nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_control : %x",
nvgpu_mem_rd(g, ctx_mem,
ctxsw_prog_main_image_context_timestamp_buffer_control_o()));
nvgpu_err(g, "NUM_SAVE_OPERATIONS : %d",
nvgpu_mem_rd(g, ctx_mem,
ctxsw_prog_main_image_num_save_ops_o()));
nvgpu_err(g, "WFI_SAVE_OPERATIONS : %d",
nvgpu_mem_rd(g, ctx_mem,
ctxsw_prog_main_image_num_wfi_save_ops_o()));
nvgpu_err(g, "CTA_SAVE_OPERATIONS : %d",
nvgpu_mem_rd(g, ctx_mem,
ctxsw_prog_main_image_num_cta_save_ops_o()));
nvgpu_err(g, "GFXP_SAVE_OPERATIONS : %d",
nvgpu_mem_rd(g, ctx_mem,
ctxsw_prog_main_image_num_gfxp_save_ops_o()));
nvgpu_err(g, "CILP_SAVE_OPERATIONS : %d",
nvgpu_mem_rd(g, ctx_mem,
ctxsw_prog_main_image_num_cilp_save_ops_o()));
nvgpu_err(g,
"image gfx preemption option (GFXP is 1) %x",
nvgpu_mem_rd(g, ctx_mem,
ctxsw_prog_main_image_graphics_preemption_options_o()));
nvgpu_err(g,
"image compute preemption option (CTA is 1) %x",
nvgpu_mem_rd(g, ctx_mem,
ctxsw_prog_main_image_compute_preemption_options_o()));
}

View File

@@ -0,0 +1,46 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_CTXSW_PROG_GP10B_H
#define NVGPU_CTXSW_PROG_GP10B_H
#include <nvgpu/types.h>
struct gk20a;
struct nvgpu_mem;
void gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void gp10b_ctxsw_prog_set_compute_preemption_mode_cta(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void gp10b_ctxsw_prog_set_compute_preemption_mode_cilp(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 boosted_ctx);
void gp10b_ctxsw_prog_set_full_preemption_ptr(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr);
void gp10b_ctxsw_prog_init_ctxsw_hdr_data(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void gp10b_ctxsw_prog_dump_ctxsw_stats(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
#endif /* NVGPU_CTXSW_PROG_GP10B_H */

View File

@@ -0,0 +1,111 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/utils.h>
#include <nvgpu/nvgpu_mem.h>
#include "ctxsw_prog_gm20b.h"
#include "ctxsw_prog_gv11b.h"
#include <nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h>
void gv11b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
u64 addr)
{
addr = addr >> 8;
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_ptr_o(),
u64_lo32(addr));
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_zcull_ptr_hi_o(),
u64_hi32(addr));
}
void gv11b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
u64 addr)
{
addr = addr >> 8;
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_ptr_o(),
u64_lo32(addr));
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_ptr_hi_o(),
u64_hi32(addr));
}
u32 gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw(void)
{
return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f();
}
u32 gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
gm20b_ctxsw_prog_set_pm_mode(g, ctx_mem,
ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f());
return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f();
}
void gv11b_ctxsw_prog_set_full_preemption_ptr(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr)
{
addr = addr >> 8;
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_full_preemption_ptr_o(),
u64_lo32(addr));
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_full_preemption_ptr_hi_o(),
u64_hi32(addr));
}
void gv11b_ctxsw_prog_set_full_preemption_ptr_veid0(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr)
{
addr = addr >> 8;
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_full_preemption_ptr_veid0_o(),
u64_lo32(addr));
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_full_preemption_ptr_veid0_hi_o(),
u64_hi32(addr));
}
u32 gv11b_ctxsw_prog_hw_get_perf_counter_register_stride(void)
{
return ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v();
}
void gv11b_ctxsw_prog_set_context_buffer_ptr(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr)
{
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_context_buffer_ptr_hi_o(),
u64_hi32(addr));
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_context_buffer_ptr_o(),
u64_lo32(addr));
}
void gv11b_ctxsw_prog_set_type_per_veid_header(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
nvgpu_mem_wr(g, ctx_mem,
ctxsw_prog_main_image_ctl_o(),
ctxsw_prog_main_image_ctl_type_per_veid_header_v());
}

View File

@@ -0,0 +1,45 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_CTXSW_PROG_GV11B_H
#define NVGPU_CTXSW_PROG_GV11B_H
#include <nvgpu/types.h>
void gv11b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
u64 addr);
void gv11b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
u64 addr);
u32 gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw(void);
u32 gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void gv11b_ctxsw_prog_set_full_preemption_ptr(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr);
void gv11b_ctxsw_prog_set_full_preemption_ptr_veid0(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr);
u32 gv11b_ctxsw_prog_hw_get_perf_counter_register_stride(void);
void gv11b_ctxsw_prog_set_context_buffer_ptr(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr);
void gv11b_ctxsw_prog_set_type_per_veid_header(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
#endif /* NVGPU_CTXSW_PROG_GV11B_H */

View File

@@ -43,7 +43,6 @@
#include <nvgpu/log.h>
#include <nvgpu/fecs_trace.h>
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
struct gk20a_fecs_trace_hash_ent {
@@ -62,29 +61,14 @@ struct gk20a_fecs_trace {
};
#ifdef CONFIG_GK20A_CTXSW_TRACE
u32 gk20a_fecs_trace_record_ts_tag_invalid_ts_v(void)
{
return ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
}
u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts)
{
return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32));
}
u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts)
{
return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32);
}
static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch)
{
return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL);
}
int gk20a_fecs_trace_num_ts(void)
int gk20a_fecs_trace_num_ts(struct gk20a *g)
{
return (ctxsw_prog_record_timestamp_record_size_in_bytes_v()
return (g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes()
- sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64);
}
@@ -94,18 +78,18 @@ struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
return (struct gk20a_fecs_trace_record *)
((u8 *) mem->cpu_va
+ (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
((u8 *) mem->cpu_va +
(idx * g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes()));
}
bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r)
bool gk20a_fecs_trace_is_valid_record(struct gk20a *g,
struct gk20a_fecs_trace_record *r)
{
/*
* testing magic_hi should suffice. magic_lo is sometimes used
* as a sequence number in experimental ucode.
*/
return (r->magic_hi
== ctxsw_prog_record_timestamp_magic_value_hi_v_value_v());
return g->ops.gr.ctxsw_prog.is_ts_valid_record(r->magic_hi);
}
int gk20a_fecs_trace_get_read_index(struct gk20a *g)
@@ -254,7 +238,7 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
"consuming record trace=%p read=%d record=%p", trace, index, r);
if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) {
if (unlikely(!gk20a_fecs_trace_is_valid_record(g, r))) {
nvgpu_warn(g,
"trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
trace, index, r, r->magic_lo, r->magic_hi);
@@ -278,10 +262,11 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
entry.vmid = vmid;
/* break out FECS record into trace events */
for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
for (i = 0; i < gk20a_fecs_trace_num_ts(g); i++) {
entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
entry.tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]);
entry.timestamp =
g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]);
entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
nvgpu_log(g, gpu_dbg_ctxsw,
@@ -402,7 +387,7 @@ static int gk20a_fecs_trace_periodic_polling(void *arg)
size_t gk20a_fecs_trace_buffer_size(struct gk20a *g)
{
return GK20A_FECS_TRACE_NUM_RECORDS
* ctxsw_prog_record_timestamp_record_size_in_bytes_v();
* g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes();
}
int gk20a_fecs_trace_init(struct gk20a *g)
@@ -449,8 +434,6 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
* in the context header.
*/
u32 lo;
u32 hi;
u64 addr;
struct gk20a_fecs_trace *trace = g->fecs_trace;
struct nvgpu_mem *mem;
@@ -475,37 +458,24 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
} else {
addr = nvgpu_inst_block_addr(g, mem);
nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
aperture_mask = nvgpu_aperture_mask(g, mem,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
aperture_mask =
g->ops.gr.ctxsw_prog.get_ts_buffer_aperture_mask(g, mem);
}
if (!addr)
return -ENOMEM;
lo = u64_lo32(addr);
hi = u64_hi32(addr);
mem = &gr_ctx->mem;
nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
lo, GK20A_FECS_TRACE_NUM_RECORDS);
nvgpu_log(g, gpu_dbg_ctxsw, "addr=%llx count=%d", addr,
GK20A_FECS_TRACE_NUM_RECORDS);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
GK20A_FECS_TRACE_NUM_RECORDS));
g->ops.gr.ctxsw_prog.set_ts_num_records(g, mem,
GK20A_FECS_TRACE_NUM_RECORDS);
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
mem = &ch->ctx_header;
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
lo);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
aperture_mask);
g->ops.gr.ctxsw_prog.set_ts_buffer_ptr(g, mem, addr, aperture_mask);
/* pid (process identifier) in user space, corresponds to tgid (thread
* group id) in kernel space.
@@ -573,7 +543,7 @@ int gk20a_gr_max_entries(struct gk20a *g,
int tag;
/* Compute number of entries per record, with given filter */
for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++)
for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(g); tag++)
n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
/* Return max number of entries generated for the whole ring */

View File

@@ -53,7 +53,6 @@
#include "gr_pri_gk20a.h"
#include "regops_gk20a.h"
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
@@ -93,7 +92,7 @@ u32 gr_gk20a_get_ctx_id(struct gk20a *g, struct nvgpu_mem *ctx_mem)
Flush and invalidate before cpu update. */
g->ops.mm.l2_flush(g, true);
ctx_id = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_context_id_o());
ctx_id = g->ops.gr.ctxsw_prog.get_main_image_ctx_id(g, ctx_mem);
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, "ctx_id: 0x%x", ctx_id);
return ctx_id;
}
@@ -619,9 +618,8 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
{
if (update_patch_count) {
/* reset patch count if ucode has already processed it */
gr_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
&gr_ctx->mem,
ctxsw_prog_main_image_patch_count_o());
gr_ctx->patch_ctx.data_count =
g->ops.gr.ctxsw_prog.get_patch_count(g, &gr_ctx->mem);
nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
gr_ctx->patch_ctx.data_count);
}
@@ -634,8 +632,7 @@ void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
{
/* Write context count to context image if it is mapped */
if (update_patch_count) {
nvgpu_mem_wr(g, &gr_ctx->mem,
ctxsw_prog_main_image_patch_count_o(),
g->ops.gr.ctxsw_prog.set_patch_count(g, &gr_ctx->mem,
gr_ctx->patch_ctx.data_count);
nvgpu_log(g, gpu_dbg_info, "write patch count %d",
gr_ctx->patch_ctx.data_count);
@@ -710,24 +707,6 @@ int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
return ret;
}
void gr_gk20a_write_zcull_ptr(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va)
{
u32 va = u64_lo32(gpu_va >> 8);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_zcull_ptr_o(), va);
}
void gr_gk20a_write_pm_ptr(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va)
{
u32 va = u64_lo32(gpu_va >> 8);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_pm_ptr_o(), va);
}
static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
struct nvgpu_gr_ctx *gr_ctx)
{
@@ -740,8 +719,8 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
mem = &gr_ctx->mem;
if (gr_ctx->zcull_ctx.gpu_va == 0ULL &&
gr_ctx->zcull_ctx.ctx_sw_mode ==
ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
g->ops.gr.ctxsw_prog.is_zcull_mode_separate_buffer(
gr_ctx->zcull_ctx.ctx_sw_mode)) {
return -EINVAL;
}
@@ -757,15 +736,14 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
return ret;
}
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_zcull_o(),
gr_ctx->zcull_ctx.ctx_sw_mode);
g->ops.gr.ctxsw_prog.set_zcull(g, mem, gr_ctx->zcull_ctx.ctx_sw_mode);
if (ctxheader->gpu_va != 0ULL) {
g->ops.gr.write_zcull_ptr(g, ctxheader,
gr_ctx->zcull_ctx.gpu_va);
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, ctxheader,
gr_ctx->zcull_ctx.gpu_va);
} else {
g->ops.gr.write_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va);
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, mem,
gr_ctx->zcull_ctx.gpu_va);
}
gk20a_enable_channel_tsg(g, c);
@@ -1302,7 +1280,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx)
{
struct gr_gk20a *gr = &g->gr;
u32 ctx_header_bytes = ctxsw_prog_fecs_header_v();
u32 ctx_header_bytes = g->ops.gr.ctxsw_prog.hw_get_fecs_header_size();
u32 ctx_header_words;
u32 i;
u32 data;
@@ -1497,10 +1475,9 @@ restore_fe_go_idle:
data = nvgpu_mem_rd32(g, gr_mem, i);
nvgpu_mem_wr32(g, gold_mem, i, data);
}
nvgpu_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
g->ops.gr.ctxsw_prog.set_zcull_mode_no_ctxsw(g, gold_mem);
g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, gold_mem, 0);
err = g->ops.gr.commit_inst(c, gr_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
if (err != 0) {
@@ -1554,7 +1531,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *gr_ctx = NULL;
struct nvgpu_mem *mem = NULL;
u32 data;
int ret;
nvgpu_log_fn(g, " ");
@@ -1587,16 +1563,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
Flush and invalidate before cpu update. */
g->ops.mm.l2_flush(g, true);
data = nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_pm_o());
data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
data |= enable_smpc_ctxsw ?
ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_pm_o(), data);
g->ops.gr.ctxsw_prog.set_pm_smpc_mode(g, mem, enable_smpc_ctxsw);
out:
gk20a_enable_channel_tsg(g, c);
@@ -1612,7 +1579,6 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
struct nvgpu_mem *gr_mem = NULL;
struct nvgpu_gr_ctx *gr_ctx;
struct pm_ctx_desc *pm_ctx;
u32 data;
u64 virt_addr = 0;
struct nvgpu_mem *ctxheader = &c->ctx_header;
int ret;
@@ -1633,24 +1599,29 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
}
if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
(g->ops.gr.get_hw_accessor_stream_out_mode == NULL)) {
nvgpu_err(g, "Mode-E hwpm context switch mode is not supported");
(g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw ==
NULL)) {
nvgpu_err(g,
"Mode-E hwpm context switch mode is not supported");
return -EINVAL;
}
switch (mode) {
case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW:
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
if (pm_ctx->pm_mode ==
g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw()) {
return 0;
}
break;
case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW:
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
if (pm_ctx->pm_mode ==
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) {
return 0;
}
break;
case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) {
if (pm_ctx->pm_mode ==
g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) {
return 0;
}
break;
@@ -1711,37 +1682,34 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
}
}
data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
data = data & ~ctxsw_prog_main_image_pm_mode_m();
switch (mode) {
case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW:
pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
pm_ctx->pm_mode =
g->ops.gr.ctxsw_prog.set_pm_mode_ctxsw(g, gr_mem);
virt_addr = pm_ctx->mem.gpu_va;
break;
case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode();
pm_ctx->pm_mode =
g->ops.gr.ctxsw_prog.set_pm_mode_stream_out_ctxsw(g, gr_mem);
virt_addr = pm_ctx->mem.gpu_va;
break;
case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW:
pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
pm_ctx->pm_mode =
g->ops.gr.ctxsw_prog.set_pm_mode_no_ctxsw(g, gr_mem);
virt_addr = 0;
}
data |= pm_ctx->pm_mode;
nvgpu_mem_wr(g, gr_mem, ctxsw_prog_main_image_pm_o(), data);
if (ctxheader->gpu_va != 0ULL) {
struct channel_gk20a *ch;
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
g->ops.gr.write_pm_ptr(g, &ch->ctx_header, virt_addr);
g->ops.gr.ctxsw_prog.set_pm_ptr(g, &ch->ctx_header,
virt_addr);
}
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
} else {
g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr);
g->ops.gr.ctxsw_prog.set_pm_ptr(g, gr_mem, virt_addr);
}
/* enable channel */
@@ -1750,26 +1718,13 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
return 0;
}
void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
struct nvgpu_mem *mem)
{
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_num_save_ops_o(), 0);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_num_restore_ops_o(), 0);
}
/* load saved fresh copy of gloden image into channel gr_ctx */
int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
struct channel_gk20a *c,
struct nvgpu_gr_ctx *gr_ctx)
{
struct gr_gk20a *gr = &g->gr;
u32 virt_addr_lo;
u32 virt_addr_hi;
u64 virt_addr = 0;
u32 v, data;
int ret = 0;
struct nvgpu_mem *mem;
nvgpu_log_fn(g, " ");
@@ -1787,8 +1742,8 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
gr->ctx_vars.local_golden_image,
gr->ctx_vars.golden_image_size);
if (g->ops.gr.init_ctxsw_hdr_data != NULL) {
g->ops.gr.init_ctxsw_hdr_data(g, mem);
if (g->ops.gr.ctxsw_prog.init_ctxsw_hdr_data != NULL) {
g->ops.gr.ctxsw_prog.init_ctxsw_hdr_data(g, mem);
}
if ((g->ops.gr.enable_cde_in_fecs != NULL) && c->cde) {
@@ -1796,32 +1751,13 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
}
/* set priv access map */
virt_addr_lo =
u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
virt_addr_hi =
u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
if (g->allow_all) {
data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f();
} else {
data = ctxsw_prog_main_image_priv_access_map_config_mode_use_map_f();
}
nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(),
data);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
virt_addr_lo);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
virt_addr_hi);
g->ops.gr.ctxsw_prog.set_priv_access_map_config_mode(g, mem,
g->allow_all);
g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, mem,
gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
/* disable verif features */
v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o());
v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
v = v | ctxsw_prog_main_image_misc_options_verif_features_disabled_f();
nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v);
g->ops.gr.ctxsw_prog.disable_verif_features(g, mem);
if (g->ops.gr.update_ctxsw_preemption_mode != NULL) {
g->ops.gr.update_ctxsw_preemption_mode(g, gr_ctx, &c->ctx_header);
@@ -1831,26 +1767,19 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
g->ops.gr.update_boosted_ctx(g, mem, gr_ctx);
}
virt_addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
virt_addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
nvgpu_log(g, gpu_dbg_info, "write patch count = %d",
gr_ctx->patch_ctx.data_count);
nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
gr_ctx->patch_ctx.data_count);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_patch_adr_lo_o(),
virt_addr_lo);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_patch_adr_hi_o(),
virt_addr_hi);
g->ops.gr.ctxsw_prog.set_patch_count(g, mem,
gr_ctx->patch_ctx.data_count);
g->ops.gr.ctxsw_prog.set_patch_addr(g, mem,
gr_ctx->patch_ctx.mem.gpu_va);
/* Update main header region of the context buffer with the info needed
* for PM context switching, including mode and possibly a pointer to
* the PM backing store.
*/
if (gr_ctx->pm_ctx.pm_mode != ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
if (gr_ctx->pm_ctx.pm_mode !=
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) {
if (gr_ctx->pm_ctx.mem.gpu_va == 0ULL) {
nvgpu_err(g,
"context switched pm with no pm buffer!");
@@ -1862,15 +1791,10 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
virt_addr = 0;
}
data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o());
data = data & ~ctxsw_prog_main_image_pm_mode_m();
data |= gr_ctx->pm_ctx.pm_mode;
g->ops.gr.ctxsw_prog.set_pm_mode(g, mem, gr_ctx->pm_ctx.pm_mode);
g->ops.gr.ctxsw_prog.set_pm_ptr(g, mem, virt_addr);
nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);
g->ops.gr.write_pm_ptr(g, mem, virt_addr);
return ret;
return 0;
}
static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
@@ -2959,7 +2883,8 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
}
/* PM ctxt switch is off by default */
gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
gr_ctx->pm_ctx.pm_mode =
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw();
} else {
/* commit gr ctx buffer */
err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
@@ -6654,8 +6579,6 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
u32 num_tpc;
u32 tpc, gpc, reg;
u32 chk_addr;
u32 vaddr_lo;
u32 vaddr_hi;
u32 tmp;
u32 num_ovr_perf_regs = 0;
u32 *ovr_perf_regs = NULL;
@@ -6682,8 +6605,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
/* reset the patch count from previous
runs,if ucode has already processed
it */
tmp = nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_patch_count_o());
tmp = g->ops.gr.ctxsw_prog.get_patch_count(g,
mem);
if (tmp == 0U) {
gr_ctx->patch_ctx.data_count = 0;
@@ -6692,26 +6615,17 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
gr_gk20a_ctx_patch_write(g, gr_ctx,
addr, data, true);
vaddr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
vaddr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
g->ops.gr.ctxsw_prog.set_patch_count(g, mem,
gr_ctx->patch_ctx.data_count);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_patch_count_o(),
gr_ctx->patch_ctx.data_count);
if (ctxheader->gpu_va != 0ULL) {
nvgpu_mem_wr(g, ctxheader,
ctxsw_prog_main_image_patch_adr_lo_o(),
vaddr_lo);
nvgpu_mem_wr(g, ctxheader,
ctxsw_prog_main_image_patch_adr_hi_o(),
vaddr_hi);
g->ops.gr.ctxsw_prog.set_patch_addr(g,
ctxheader,
gr_ctx->patch_ctx.mem.gpu_va);
} else {
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_patch_adr_lo_o(),
vaddr_lo);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_patch_adr_hi_o(),
vaddr_hi);
g->ops.gr.ctxsw_prog.set_patch_addr(g,
mem,
gr_ctx->patch_ctx.mem.gpu_va);
}
/* we're not caching these on cpu side,
@@ -6726,24 +6640,6 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
#define ILLEGAL_ID ((u32)~0)
static inline bool check_main_image_header_magic(u8 *context)
{
u32 magic = *(u32 *)(context + ctxsw_prog_main_image_magic_value_o());
return magic == ctxsw_prog_main_image_magic_value_v_value_v();
}
static inline bool check_local_header_magic(u8 *context)
{
u32 magic = *(u32 *)(context + ctxsw_prog_local_magic_value_o());
return magic == ctxsw_prog_local_magic_value_v_value_v();
}
/* most likely dupe of ctxsw_gpccs_header__size_1_v() */
static inline u32 ctxsw_prog_ucode_header_size_in_bytes(void)
{
return 256U;
}
void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
u32 **ovr_perf_regs)
{
@@ -6758,9 +6654,9 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
u32 context_buffer_size,
u32 *priv_offset)
{
u32 i, data32;
u32 i;
u32 gpc_num, tpc_num;
u32 num_gpcs, num_tpcs;
u32 num_gpcs;
u32 chk_addr;
u32 ext_priv_offset, ext_priv_size;
u8 *context;
@@ -6809,18 +6705,18 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
return -EINVAL;
}
buffer_segments_size = ctxsw_prog_extended_buffer_segments_size_in_bytes_v();
buffer_segments_size = g->ops.gr.ctxsw_prog.hw_get_extended_buffer_segments_size_in_bytes();
/* note below is in words/num_registers */
marker_size = ctxsw_prog_extended_marker_size_in_bytes_v() >> 2;
marker_size = g->ops.gr.ctxsw_prog.hw_extended_marker_size_in_bytes() >> 2;
context = (u8 *)context_buffer;
/* sanity check main header */
if (!check_main_image_header_magic(context)) {
if (!g->ops.gr.ctxsw_prog.check_main_image_header_magic(context)) {
nvgpu_err(g,
"Invalid main header: magic value");
return -EINVAL;
}
num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
num_gpcs = g->ops.gr.ctxsw_prog.get_num_gpcs(context);
if (gpc_num >= num_gpcs) {
nvgpu_err(g,
"GPC 0x%08x is greater than total count 0x%08x!",
@@ -6828,21 +6724,20 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
return -EINVAL;
}
data32 = *(u32 *)(context + ctxsw_prog_main_extended_buffer_ctl_o());
ext_priv_size = ctxsw_prog_main_extended_buffer_ctl_size_v(data32);
g->ops.gr.ctxsw_prog.get_extended_buffer_size_offset(context,
&ext_priv_size, &ext_priv_offset);
if (0U == ext_priv_size) {
nvgpu_log_info(g, " No extended memory in context buffer");
return -EINVAL;
}
ext_priv_offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data32);
offset_to_segment = ext_priv_offset * ctxsw_prog_ucode_header_size_in_bytes();
offset_to_segment = ext_priv_offset * 256U;
offset_to_segment_end = offset_to_segment +
(ext_priv_size * buffer_segments_size);
/* check local header magic */
context += ctxsw_prog_ucode_header_size_in_bytes();
if (!check_local_header_magic(context)) {
context += g->ops.gr.ctxsw_prog.hw_get_fecs_header_size();
if (!g->ops.gr.ctxsw_prog.check_local_header_magic(context)) {
nvgpu_err(g,
"Invalid local header: magic value");
return -EINVAL;
@@ -6937,8 +6832,6 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
offset_to_segment += (num_ext_gpccs_ext_buffer_segments *
buffer_segments_size * gpc_num);
num_tpcs = g->gr.gpc_tpc_count[gpc_num];
/* skip the head marker to start with */
inter_seg_offset = marker_size;
@@ -6949,23 +6842,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g,
(tpc_num * control_register_stride) +
sm_dsm_perf_ctrl_reg_id;
} else {
/* skip all the control registers */
inter_seg_offset = inter_seg_offset +
(num_tpcs * control_register_stride);
/* skip the marker between control and counter segments */
inter_seg_offset += marker_size;
/* skip over counter regs of TPCs before the one we want */
inter_seg_offset = inter_seg_offset +
(tpc_num * perf_register_stride) *
ctxsw_prog_extended_num_smpc_quadrants_v();
/* skip over the register for the quadrants we do not want.
* then skip to the register in this tpc */
inter_seg_offset = inter_seg_offset +
(perf_register_stride * quad) +
sm_dsm_perf_reg_id;
return -EINVAL;
}
/* set the offset to the segment offset plus the inter segment offset to
@@ -7146,7 +7023,6 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
u32 *num_ppcs, u32 *ppc_mask,
u32 *reg_ppc_count)
{
u32 data32;
u32 num_pes_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_PES_PER_GPC);
/*
@@ -7159,11 +7035,7 @@ static int gr_gk20a_determine_ppc_configuration(struct gk20a *g,
return -EINVAL;
}
data32 = *(u32 *)(context + ctxsw_prog_local_image_ppc_info_o());
*num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data32);
*ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data32);
g->ops.gr.ctxsw_prog.get_ppc_info(context, num_ppcs, ppc_mask);
*reg_ppc_count = g->netlist_vars->ctxsw_regs.ppc.count;
return 0;
@@ -7242,7 +7114,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
u32 context_buffer_size,
u32 *priv_offset)
{
u32 i, data32;
u32 i;
int err;
enum ctxsw_addr_type addr_type;
u32 broadcast_flags;
@@ -7267,22 +7139,23 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
}
context = (u8 *)context_buffer;
if (!check_main_image_header_magic(context)) {
if (!g->ops.gr.ctxsw_prog.check_main_image_header_magic(context)) {
nvgpu_err(g,
"Invalid main header: magic value");
return -EINVAL;
}
num_gpcs = *(u32 *)(context + ctxsw_prog_main_image_num_gpcs_o());
num_gpcs = g->ops.gr.ctxsw_prog.get_num_gpcs(context);
/* Parse the FECS local header. */
context += ctxsw_prog_ucode_header_size_in_bytes();
if (!check_local_header_magic(context)) {
context += g->ops.gr.ctxsw_prog.hw_get_fecs_header_size();
if (!g->ops.gr.ctxsw_prog.check_local_header_magic(context)) {
nvgpu_err(g,
"Invalid FECS local header: magic value");
return -EINVAL;
}
data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
sys_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
sys_priv_offset =
g->ops.gr.ctxsw_prog.get_local_priv_register_ctl_offset(context);
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "sys_priv_offset=0x%x", sys_priv_offset);
/* If found in Ext buffer, ok.
@@ -7302,8 +7175,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
(addr_type == CTXSW_ADDR_TYPE_BE)) {
/* Find the offset in the FECS segment. */
offset_to_segment = sys_priv_offset *
ctxsw_prog_ucode_header_size_in_bytes();
offset_to_segment = sys_priv_offset * 256U;
err = gr_gk20a_process_context_buffer_priv_segment(g,
addr_type, addr,
@@ -7326,15 +7198,14 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
/* Parse the GPCCS local header(s).*/
for (i = 0; i < num_gpcs; i++) {
context += ctxsw_prog_ucode_header_size_in_bytes();
if (!check_local_header_magic(context)) {
context += g->ops.gr.ctxsw_prog.hw_get_gpccs_header_size();
if (!g->ops.gr.ctxsw_prog.check_local_header_magic(context)) {
nvgpu_err(g,
"Invalid GPCCS local header: magic value");
return -EINVAL;
}
data32 = *(u32 *)(context + ctxsw_prog_local_priv_register_ctl_o());
gpc_priv_offset = ctxsw_prog_local_priv_register_ctl_offset_v(data32);
gpc_priv_offset = g->ops.gr.ctxsw_prog.get_local_priv_register_ctl_offset(context);
err = gr_gk20a_determine_ppc_configuration(g, context,
&num_ppcs, &ppc_mask,
@@ -7345,7 +7216,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
}
num_tpcs = *(u32 *)(context + ctxsw_prog_local_image_num_tpcs_o());
num_tpcs = g->ops.gr.ctxsw_prog.get_num_tpcs(context);
if ((i == gpc_num) && ((tpc_num + 1U) > num_tpcs)) {
nvgpu_err(g,
@@ -7359,8 +7230,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
"gpc_priv_offset 0x%#08x",
gpc_priv_offset);
offset_to_segment = gpc_priv_offset *
ctxsw_prog_ucode_header_size_in_bytes();
offset_to_segment = gpc_priv_offset * 256U;
err = g->ops.gr.get_offset_in_gpccs_segment(g,
addr_type,

View File

@@ -738,12 +738,6 @@ int gr_gk20a_init_sm_id_table(struct gk20a *g);
int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va);
void gr_gk20a_write_zcull_ptr(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va);
void gr_gk20a_write_pm_ptr(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va);
u32 gk20a_gr_gpc_offset(struct gk20a *g, u32 gpc);
u32 gk20a_gr_tpc_offset(struct gk20a *g, u32 tpc);
void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
@@ -751,8 +745,6 @@ void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
void gk20a_gr_init_ovr_sm_dsm_perf(void);
void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
u32 **ovr_perf_regs);
void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
struct nvgpu_mem *mem);
u32 gr_gk20a_get_patch_slots(struct gk20a *g);
int gk20a_gr_handle_notify_pending(struct gk20a *g,
struct gr_gk20a_isr_data *isr_data);

View File

@@ -39,7 +39,6 @@
#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
#include <nvgpu/hw/gm20b/hw_fifo_gm20b.h>
#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
#include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h>
#include <nvgpu/hw/gm20b/hw_perf_gm20b.h>
void gr_gm20b_init_gpc_mmu(struct gk20a *g)
@@ -537,7 +536,7 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
*sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs;
*ctrl_register_stride =
ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
g->ops.gr.ctxsw_prog.hw_get_perf_counter_control_register_stride();
}
u32 gr_gm20b_get_gpc_mask(struct gk20a *g)
@@ -908,16 +907,11 @@ int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_mem *ctxheader)
{
u32 cta_preempt_option =
ctxsw_prog_main_image_preemption_options_control_cta_enabled_f();
nvgpu_log_fn(g, " ");
if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
nvgpu_log_info(g, "CTA: %x", cta_preempt_option);
nvgpu_mem_wr(g, &gr_ctx->mem,
ctxsw_prog_main_image_preemption_options_o(),
cta_preempt_option);
g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g,
&gr_ctx->mem);
}
nvgpu_log_fn(g, "done");
@@ -1069,7 +1063,6 @@ int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *gr_ctx;
struct nvgpu_mem *mem;
u32 v;
nvgpu_log_fn(c->g, " ");
@@ -1084,11 +1077,7 @@ int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
return -EINVAL;
}
v = nvgpu_mem_rd(c->g, mem, ctxsw_prog_main_image_pm_o());
v &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
nvgpu_mem_wr(c->g, mem, ctxsw_prog_main_image_pm_o(), v);
c->g->ops.gr.ctxsw_prog.set_pc_sampling(c->g, mem, enable);
nvgpu_log_fn(c->g, "done");
@@ -1176,11 +1165,7 @@ void gr_gm20b_init_cyclestats(struct gk20a *g)
void gr_gm20b_enable_cde_in_fecs(struct gk20a *g, struct nvgpu_mem *mem)
{
u32 cde_v;
cde_v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_ctl_o());
cde_v |= ctxsw_prog_main_image_ctl_cde_enabled_f();
nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_ctl_o(), cde_v);
g->ops.gr.ctxsw_prog.set_cde_enabled(g, mem);
}
void gr_gm20b_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state)

View File

@@ -41,6 +41,7 @@
#include "common/ptimer/ptimer_gk20a.h"
#include "common/fb/fb_gm20b.h"
#include "common/netlist/netlist_gm20b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "common/therm/therm_gm20b.h"
#include "common/ltc/ltc_gm20b.h"
#include "common/fuse/fuse_gm20b.h"
@@ -288,8 +289,6 @@ static const struct gpu_ops gm20b_ops = {
.program_zcull_mapping = gr_gk20a_program_zcull_mapping,
.commit_global_timeslice = gr_gk20a_commit_global_timeslice,
.commit_inst = gr_gk20a_commit_inst,
.write_zcull_ptr = gr_gk20a_write_zcull_ptr,
.write_pm_ptr = gr_gk20a_write_pm_ptr,
.load_tpc_mask = gr_gm20b_load_tpc_mask,
.trigger_suspend = gr_gk20a_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -312,7 +311,6 @@ static const struct gpu_ops gm20b_ops = {
.init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf,
.get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
.disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
.init_ctxsw_hdr_data = gk20a_gr_init_ctxsw_hdr_data,
.fecs_host_int_enable = gr_gk20a_fecs_host_int_enable,
.handle_notify_pending = gk20a_gr_handle_notify_pending,
.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
@@ -335,6 +333,72 @@ static const struct gpu_ops gm20b_ops = {
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
.alloc_gfxp_rtv_cb = NULL,
.commit_gfxp_rtv_cb = NULL,
.ctxsw_prog = {
.hw_get_fecs_header_size =
gm20b_ctxsw_prog_hw_get_fecs_header_size,
.hw_get_gpccs_header_size =
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
.hw_get_extended_buffer_segments_size_in_bytes =
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
.hw_extended_marker_size_in_bytes =
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
.hw_get_perf_counter_control_register_stride =
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
.get_main_image_ctx_id =
gm20b_ctxsw_prog_get_main_image_ctx_id,
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
.set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr,
.set_zcull = gm20b_ctxsw_prog_set_zcull,
.set_zcull_mode_no_ctxsw =
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
.is_zcull_mode_separate_buffer =
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
.set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr,
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
.set_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
.hw_get_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
.init_ctxsw_hdr_data = gm20b_ctxsw_prog_init_ctxsw_hdr_data,
.set_compute_preemption_mode_cta =
gm20b_ctxsw_prog_set_compute_preemption_mode_cta,
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
.set_priv_access_map_config_mode =
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
.set_priv_access_map_addr =
gm20b_ctxsw_prog_set_priv_access_map_addr,
.disable_verif_features =
gm20b_ctxsw_prog_disable_verif_features,
.check_main_image_header_magic =
gm20b_ctxsw_prog_check_main_image_header_magic,
.check_local_header_magic =
gm20b_ctxsw_prog_check_local_header_magic,
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
.get_extended_buffer_size_offset =
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
.get_local_priv_register_ctl_offset =
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
.hw_get_ts_tag_invalid_timestamp =
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
.hw_record_ts_timestamp =
gm20b_ctxsw_prog_hw_record_ts_timestamp,
.hw_get_ts_record_size_in_bytes =
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
.get_ts_buffer_aperture_mask =
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
}
},
.fb = {
.init_hw = gm20b_fb_init_hw,
@@ -695,6 +759,7 @@ int gm20b_init_hal(struct gk20a *g)
gops->ltc = gm20b_ops.ltc;
gops->ce2 = gm20b_ops.ce2;
gops->gr = gm20b_ops.gr;
gops->gr.ctxsw_prog = gm20b_ops.gr.ctxsw_prog;
gops->fb = gm20b_ops.fb;
gops->clock_gating = gm20b_ops.clock_gating;
gops->fifo = gm20b_ops.fifo;

View File

@@ -33,6 +33,8 @@
#include "common/fb/fb_gm20b.h"
#include "common/fb/fb_gp106.h"
#include "common/netlist/netlist_gp106.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
#include "common/xve/xve_gp106.h"
#include "common/therm/therm_gm20b.h"
#include "common/therm/therm_gp106.h"
@@ -354,8 +356,6 @@ static const struct gpu_ops gp106_ops = {
.program_zcull_mapping = gr_gk20a_program_zcull_mapping,
.commit_global_timeslice = gr_gk20a_commit_global_timeslice,
.commit_inst = gr_gk20a_commit_inst,
.write_zcull_ptr = gr_gk20a_write_zcull_ptr,
.write_pm_ptr = gr_gk20a_write_pm_ptr,
.load_tpc_mask = gr_gm20b_load_tpc_mask,
.trigger_suspend = gr_gk20a_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -412,6 +412,81 @@ static const struct gpu_ops gp106_ops = {
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
.alloc_gfxp_rtv_cb = NULL,
.commit_gfxp_rtv_cb = NULL,
.ctxsw_prog = {
.hw_get_fecs_header_size =
gm20b_ctxsw_prog_hw_get_fecs_header_size,
.hw_get_gpccs_header_size =
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
.hw_get_extended_buffer_segments_size_in_bytes =
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
.hw_extended_marker_size_in_bytes =
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
.hw_get_perf_counter_control_register_stride =
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
.get_main_image_ctx_id =
gm20b_ctxsw_prog_get_main_image_ctx_id,
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
.set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr,
.set_zcull = gm20b_ctxsw_prog_set_zcull,
.set_zcull_mode_no_ctxsw =
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
.is_zcull_mode_separate_buffer =
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
.set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr,
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
.set_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
.hw_get_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
.init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data,
.set_compute_preemption_mode_cta =
gp10b_ctxsw_prog_set_compute_preemption_mode_cta,
.set_compute_preemption_mode_cilp =
gp10b_ctxsw_prog_set_compute_preemption_mode_cilp,
.set_graphics_preemption_mode_gfxp =
gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp,
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
.set_priv_access_map_config_mode =
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
.set_priv_access_map_addr =
gm20b_ctxsw_prog_set_priv_access_map_addr,
.disable_verif_features =
gm20b_ctxsw_prog_disable_verif_features,
.check_main_image_header_magic =
gm20b_ctxsw_prog_check_main_image_header_magic,
.check_local_header_magic =
gm20b_ctxsw_prog_check_local_header_magic,
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
.get_extended_buffer_size_offset =
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
.get_local_priv_register_ctl_offset =
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
.hw_get_ts_tag_invalid_timestamp =
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
.hw_record_ts_timestamp =
gm20b_ctxsw_prog_hw_record_ts_timestamp,
.hw_get_ts_record_size_in_bytes =
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
.get_ts_buffer_aperture_mask =
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
.set_pmu_options_boost_clock_frequencies =
gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies,
.set_full_preemption_ptr =
gp10b_ctxsw_prog_set_full_preemption_ptr,
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
}
},
.fb = {
.init_hw = gm20b_fb_init_hw,
@@ -848,6 +923,7 @@ int gp106_init_hal(struct gk20a *g)
gops->ltc = gp106_ops.ltc;
gops->ce2 = gp106_ops.ce2;
gops->gr = gp106_ops.gr;
gops->gr.ctxsw_prog = gp106_ops.gr.ctxsw_prog;
gops->fb = gp106_ops.fb;
gops->clock_gating = gp106_ops.clock_gating;
gops->fifo = gp106_ops.fifo;

View File

@@ -28,7 +28,6 @@
#include "fecs_trace_gp10b.h"
#include <nvgpu/hw/gp10b/hw_ctxsw_prog_gp10b.h>
#include <nvgpu/hw/gp10b/hw_gr_gp10b.h>
#ifdef CONFIG_GK20A_CTXSW_TRACE

View File

@@ -44,7 +44,6 @@
#include <nvgpu/hw/gp10b/hw_gr_gp10b.h>
#include <nvgpu/hw/gp10b/hw_fifo_gp10b.h>
#include <nvgpu/hw/gp10b/hw_ctxsw_prog_gp10b.h>
#define GFXP_WFI_TIMEOUT_COUNT_DEFAULT 100000U
@@ -1134,83 +1133,27 @@ fail_free_gk20a_ctx:
void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm,
struct nvgpu_gr_ctx *gr_ctx)
{
struct nvgpu_mem *mem = &gr_ctx->mem;
nvgpu_err(g, "ctxsw_prog_main_image_magic_value_o : %x (expect %x)",
nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_magic_value_o()),
ctxsw_prog_main_image_magic_value_v_value_v());
nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x",
nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o()));
nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x",
nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_o()));
nvgpu_err(g, "ctxsw_prog_main_image_context_timestamp_buffer_control : %x",
nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_context_timestamp_buffer_control_o()));
nvgpu_err(g, "NUM_SAVE_OPERATIONS : %d",
nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_num_save_ops_o()));
nvgpu_err(g, "WFI_SAVE_OPERATIONS : %d",
nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_num_wfi_save_ops_o()));
nvgpu_err(g, "CTA_SAVE_OPERATIONS : %d",
nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_num_cta_save_ops_o()));
nvgpu_err(g, "GFXP_SAVE_OPERATIONS : %d",
nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_num_gfxp_save_ops_o()));
nvgpu_err(g, "CILP_SAVE_OPERATIONS : %d",
nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_num_cilp_save_ops_o()));
nvgpu_err(g,
"image gfx preemption option (GFXP is 1) %x",
nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_graphics_preemption_options_o()));
nvgpu_err(g,
"image compute preemption option (CTA is 1) %x",
nvgpu_mem_rd(g, mem,
ctxsw_prog_main_image_compute_preemption_options_o()));
g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g, &gr_ctx->mem);
}
void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_mem *ctxheader)
{
struct nvgpu_mem *mem = &gr_ctx->mem;
u32 gfxp_preempt_option =
ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
u32 cilp_preempt_option =
ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
u32 cta_preempt_option =
ctxsw_prog_main_image_compute_preemption_options_control_cta_f();
int err;
nvgpu_log_fn(g, " ");
if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
nvgpu_log_info(g, "GfxP: %x", gfxp_preempt_option);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_graphics_preemption_options_o(),
gfxp_preempt_option);
g->ops.gr.ctxsw_prog.set_graphics_preemption_mode_gfxp(g, mem);
}
if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
nvgpu_log_info(g, "CILP: %x", cilp_preempt_option);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_compute_preemption_options_o(),
cilp_preempt_option);
g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cilp(g, mem);
}
if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
nvgpu_log_info(g, "CTA: %x", cta_preempt_option);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_compute_preemption_options_o(),
cta_preempt_option);
g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g, mem);
}
if (gr_ctx->preempt_ctxsw_buffer.gpu_va != 0ULL) {
@@ -2226,12 +2169,10 @@ enable_ch:
}
void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
struct nvgpu_gr_ctx *gr_ctx) {
u32 v;
v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f(
struct nvgpu_gr_ctx *gr_ctx)
{
g->ops.gr.ctxsw_prog.set_pmu_options_boost_clock_frequencies(g, mem,
gr_ctx->boosted_ctx);
nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pmu_options_o(), v);
}
int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
@@ -2357,11 +2298,7 @@ int gr_gp10b_init_preemption_state(struct gk20a *g)
void gr_gp10b_set_preemption_buffer_va(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va)
{
u32 va = u64_lo32(gpu_va >> 8);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_full_preemption_ptr_o(), va);
g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, mem, gpu_va);
}
void gr_gp10b_init_czf_bypass(struct gk20a *g)
@@ -2386,20 +2323,6 @@ int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch)
return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false);
}
void gr_gp10b_init_ctxsw_hdr_data(struct gk20a *g, struct nvgpu_mem *mem)
{
gk20a_gr_init_ctxsw_hdr_data(g, mem);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_num_wfi_save_ops_o(), 0);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_num_cta_save_ops_o(), 0);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_num_cilp_save_ops_o(), 0);
}
void gr_gp10b_init_gfxp_wfi_timeout_count(struct gk20a *g)
{
struct gr_gk20a *gr = &g->gr;

View File

@@ -147,7 +147,6 @@ void gr_gp10b_set_preemption_buffer_va(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va);
int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch);
void gr_gp10b_init_czf_bypass(struct gk20a *g);
void gr_gp10b_init_ctxsw_hdr_data(struct gk20a *g, struct nvgpu_mem *mem);
void gr_gp10b_init_gfxp_wfi_timeout_count(struct gk20a *g);
unsigned long gr_gp10b_get_max_gfxp_wfi_timeout_count(struct gk20a *g);
bool gr_gp10b_suspend_context(struct channel_gk20a *ch,

View File

@@ -45,6 +45,8 @@
#include "common/fb/fb_gm20b.h"
#include "common/fb/fb_gp10b.h"
#include "common/netlist/netlist_gp10b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
#include "common/therm/therm_gm20b.h"
#include "common/therm/therm_gp10b.h"
#include "common/ltc/ltc_gm20b.h"
@@ -310,8 +312,6 @@ static const struct gpu_ops gp10b_ops = {
.program_zcull_mapping = gr_gk20a_program_zcull_mapping,
.commit_global_timeslice = gr_gk20a_commit_global_timeslice,
.commit_inst = gr_gk20a_commit_inst,
.write_zcull_ptr = gr_gk20a_write_zcull_ptr,
.write_pm_ptr = gr_gk20a_write_pm_ptr,
.load_tpc_mask = gr_gm20b_load_tpc_mask,
.trigger_suspend = gr_gk20a_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -345,7 +345,6 @@ static const struct gpu_ops gp10b_ops = {
.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
.init_ecc = gp10b_ecc_init,
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
.init_gfxp_wfi_timeout_count =
gr_gp10b_init_gfxp_wfi_timeout_count,
.get_max_gfxp_wfi_timeout_count =
@@ -373,6 +372,81 @@ static const struct gpu_ops gp10b_ops = {
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
.alloc_gfxp_rtv_cb = NULL,
.commit_gfxp_rtv_cb = NULL,
.ctxsw_prog = {
.hw_get_fecs_header_size =
gm20b_ctxsw_prog_hw_get_fecs_header_size,
.hw_get_gpccs_header_size =
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
.hw_get_extended_buffer_segments_size_in_bytes =
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
.hw_extended_marker_size_in_bytes =
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
.hw_get_perf_counter_control_register_stride =
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
.get_main_image_ctx_id =
gm20b_ctxsw_prog_get_main_image_ctx_id,
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
.set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr,
.set_zcull = gm20b_ctxsw_prog_set_zcull,
.set_zcull_mode_no_ctxsw =
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
.is_zcull_mode_separate_buffer =
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
.set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr,
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
.set_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
.hw_get_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
.init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data,
.set_compute_preemption_mode_cta =
gp10b_ctxsw_prog_set_compute_preemption_mode_cta,
.set_compute_preemption_mode_cilp =
gp10b_ctxsw_prog_set_compute_preemption_mode_cilp,
.set_graphics_preemption_mode_gfxp =
gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp,
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
.set_priv_access_map_config_mode =
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
.set_priv_access_map_addr =
gm20b_ctxsw_prog_set_priv_access_map_addr,
.disable_verif_features =
gm20b_ctxsw_prog_disable_verif_features,
.check_main_image_header_magic =
gm20b_ctxsw_prog_check_main_image_header_magic,
.check_local_header_magic =
gm20b_ctxsw_prog_check_local_header_magic,
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
.get_extended_buffer_size_offset =
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
.get_local_priv_register_ctl_offset =
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
.hw_get_ts_tag_invalid_timestamp =
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
.hw_record_ts_timestamp =
gm20b_ctxsw_prog_hw_record_ts_timestamp,
.hw_get_ts_record_size_in_bytes =
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
.get_ts_buffer_aperture_mask =
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
.set_pmu_options_boost_clock_frequencies =
gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies,
.set_full_preemption_ptr =
gp10b_ctxsw_prog_set_full_preemption_ptr,
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
}
},
.fb = {
.init_hw = gm20b_fb_init_hw,
@@ -763,6 +837,7 @@ int gp10b_init_hal(struct gk20a *g)
gops->ltc = gp10b_ops.ltc;
gops->ce2 = gp10b_ops.ce2;
gops->gr = gp10b_ops.gr;
gops->gr.ctxsw_prog = gp10b_ops.gr.ctxsw_prog;
gops->fb = gp10b_ops.fb;
gops->clock_gating = gp10b_ops.clock_gating;
gops->fifo = gp10b_ops.fifo;

View File

@@ -37,7 +37,6 @@
#include <nvgpu/hw/gv100/hw_gr_gv100.h>
#include <nvgpu/hw/gv100/hw_proj_gv100.h>
#include <nvgpu/hw/gv100/hw_top_gv100.h>
#include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h>
#include <nvgpu/hw/gv100/hw_perf_gv100.h>
@@ -429,11 +428,6 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
}
}
u32 gr_gv100_get_hw_accessor_stream_out_mode(void)
{
return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f();
}
void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val,
u32 num_chiplets, u32 num_perfmons)
{

View File

@@ -46,7 +46,6 @@ int gr_gv100_add_ctxsw_reg_perf_pma(struct ctxsw_buf_offset_map_entry *map,
void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
u32 num_fbpas,
u32 *priv_addr_table, u32 *t);
u32 gr_gv100_get_hw_accessor_stream_out_mode(void);
void gr_gv100_init_hwpm_pmm_register(struct gk20a *g);
void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val,
u32 num_chiplets, u32 num_perfmons);

View File

@@ -36,6 +36,9 @@
#include "common/fb/fb_gv100.h"
#include "common/xve/xve_gp106.h"
#include "common/netlist/netlist_gv100.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
#include "common/therm/therm_gm20b.h"
#include "common/therm/therm_gp106.h"
#include "common/therm/therm_gp10b.h"
@@ -398,8 +401,6 @@ static const struct gpu_ops gv100_ops = {
.enable_exceptions = gr_gv11b_enable_exceptions,
.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
.get_hw_accessor_stream_out_mode =
gr_gv100_get_hw_accessor_stream_out_mode,
.get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon,
.set_pmm_register = gr_gv100_set_pmm_register,
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
@@ -416,8 +417,6 @@ static const struct gpu_ops gv100_ops = {
.program_zcull_mapping = gr_gv11b_program_zcull_mapping,
.commit_global_timeslice = gr_gv11b_commit_global_timeslice,
.commit_inst = gr_gv11b_commit_inst,
.write_zcull_ptr = gr_gv11b_write_zcull_ptr,
.write_pm_ptr = gr_gv11b_write_pm_ptr,
.load_tpc_mask = gr_gv11b_load_tpc_mask,
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -493,6 +492,93 @@ static const struct gpu_ops gv100_ops = {
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
.alloc_gfxp_rtv_cb = NULL,
.commit_gfxp_rtv_cb = NULL,
.ctxsw_prog = {
.hw_get_fecs_header_size =
gm20b_ctxsw_prog_hw_get_fecs_header_size,
.hw_get_gpccs_header_size =
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
.hw_get_extended_buffer_segments_size_in_bytes =
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
.hw_extended_marker_size_in_bytes =
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
.hw_get_perf_counter_control_register_stride =
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
.get_main_image_ctx_id =
gm20b_ctxsw_prog_get_main_image_ctx_id,
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
.set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr,
.set_zcull = gm20b_ctxsw_prog_set_zcull,
.set_zcull_mode_no_ctxsw =
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
.is_zcull_mode_separate_buffer =
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
.set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr,
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
.set_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
.hw_get_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
.hw_get_pm_mode_stream_out_ctxsw =
gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw,
.set_pm_mode_stream_out_ctxsw =
gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw,
.init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data,
.set_compute_preemption_mode_cta =
gp10b_ctxsw_prog_set_compute_preemption_mode_cta,
.set_compute_preemption_mode_cilp =
gp10b_ctxsw_prog_set_compute_preemption_mode_cilp,
.set_graphics_preemption_mode_gfxp =
gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp,
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
.set_priv_access_map_config_mode =
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
.set_priv_access_map_addr =
gm20b_ctxsw_prog_set_priv_access_map_addr,
.disable_verif_features =
gm20b_ctxsw_prog_disable_verif_features,
.check_main_image_header_magic =
gm20b_ctxsw_prog_check_main_image_header_magic,
.check_local_header_magic =
gm20b_ctxsw_prog_check_local_header_magic,
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
.get_extended_buffer_size_offset =
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
.get_local_priv_register_ctl_offset =
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
.hw_get_ts_tag_invalid_timestamp =
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
.hw_record_ts_timestamp =
gm20b_ctxsw_prog_hw_record_ts_timestamp,
.hw_get_ts_record_size_in_bytes =
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
.get_ts_buffer_aperture_mask =
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
.set_pmu_options_boost_clock_frequencies =
gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies,
.set_full_preemption_ptr =
gv11b_ctxsw_prog_set_full_preemption_ptr,
.set_full_preemption_ptr_veid0 =
gv11b_ctxsw_prog_set_full_preemption_ptr_veid0,
.hw_get_perf_counter_register_stride =
gv11b_ctxsw_prog_hw_get_perf_counter_register_stride,
.set_context_buffer_ptr =
gv11b_ctxsw_prog_set_context_buffer_ptr,
.set_type_per_veid_header =
gv11b_ctxsw_prog_set_type_per_veid_header,
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
}
},
.fb = {
.init_hw = gv11b_fb_init_hw,
@@ -1001,6 +1087,7 @@ int gv100_init_hal(struct gk20a *g)
gops->ltc = gv100_ops.ltc;
gops->ce2 = gv100_ops.ce2;
gops->gr = gv100_ops.gr;
gops->gr.ctxsw_prog = gv100_ops.gr.ctxsw_prog;
gops->fb = gv100_ops.fb;
gops->clock_gating = gv100_ops.clock_gating;
gops->fifo = gv100_ops.fifo;

View File

@@ -56,7 +56,6 @@
#include <nvgpu/hw/gv11b/hw_gr_gv11b.h>
#include <nvgpu/hw/gv11b/hw_fifo_gv11b.h>
#include <nvgpu/hw/gv11b/hw_proj_gv11b.h>
#include <nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h>
#include <nvgpu/hw/gv11b/hw_ram_gv11b.h>
#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
@@ -1663,38 +1662,23 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_mem *ctxheader)
{
struct nvgpu_mem *mem = &gr_ctx->mem;
u32 gfxp_preempt_option =
ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
u32 cilp_preempt_option =
ctxsw_prog_main_image_compute_preemption_options_control_cilp_f();
u32 cta_preempt_option =
ctxsw_prog_main_image_compute_preemption_options_control_cta_f();
int err;
nvgpu_log_fn(g, " ");
if (gr_ctx->graphics_preempt_mode ==
NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
nvgpu_log_info(g, "GfxP: %x", gfxp_preempt_option);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_graphics_preemption_options_o(),
gfxp_preempt_option);
g->ops.gr.ctxsw_prog.set_graphics_preemption_mode_gfxp(g, mem);
}
if (gr_ctx->compute_preempt_mode ==
NVGPU_PREEMPTION_MODE_COMPUTE_CILP) {
nvgpu_log_info(g, "CILP: %x", cilp_preempt_option);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_compute_preemption_options_o(),
cilp_preempt_option);
g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cilp(g, mem);
}
if (gr_ctx->compute_preempt_mode ==
NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
nvgpu_log_info(g, "CTA: %x", cta_preempt_option);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_compute_preemption_options_o(),
cta_preempt_option);
g->ops.gr.ctxsw_prog.set_compute_preemption_mode_cta(g, mem);
}
if (gr_ctx->preempt_ctxsw_buffer.gpu_va != 0ULL) {
@@ -2947,35 +2931,6 @@ int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
return 0;
}
void gr_gv11b_write_zcull_ptr(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va)
{
u32 va_lo, va_hi;
gpu_va = gpu_va >> 8;
va_lo = u64_lo32(gpu_va);
va_hi = u64_hi32(gpu_va);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_zcull_ptr_o(), va_lo);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_zcull_ptr_hi_o(), va_hi);
}
void gr_gv11b_write_pm_ptr(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va)
{
u32 va_lo, va_hi;
gpu_va = gpu_va >> 8;
va_lo = u64_lo32(gpu_va);
va_hi = u64_hi32(gpu_va);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_pm_ptr_o(), va_lo);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_pm_ptr_hi_o(), va_hi);
}
void gr_gv11b_load_tpc_mask(struct gk20a *g)
{
u32 pes_tpc_mask = 0, fuse_tpc_mask;
@@ -3009,25 +2964,9 @@ void gr_gv11b_load_tpc_mask(struct gk20a *g)
void gr_gv11b_set_preemption_buffer_va(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va)
{
u32 addr_lo, addr_hi;
/* gpu va still needs to be 8 bit aligned */
gpu_va = gpu_va >> 8;
addr_lo = u64_lo32(gpu_va);
addr_hi = u64_hi32(gpu_va);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_full_preemption_ptr_o(), addr_lo);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_full_preemption_ptr_hi_o(), addr_hi);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_full_preemption_ptr_veid0_o(), addr_lo);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_full_preemption_ptr_veid0_hi_o(),
addr_hi);
g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, mem, gpu_va);
g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g, mem, gpu_va);
}
int gr_gv11b_init_fs_state(struct gk20a *g)
@@ -3936,7 +3875,7 @@ void gv11b_gr_get_sm_dsm_perf_regs(struct gk20a *g,
*num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs;
*sm_dsm_perf_regs = _sm_dsm_perf_regs;
*perf_register_stride =
ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v();
g->ops.gr.ctxsw_prog.hw_get_perf_counter_register_stride();
}
void gv11b_gr_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
@@ -3947,7 +3886,7 @@ void gv11b_gr_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
*num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs;
*sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs;
*ctrl_register_stride =
ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
g->ops.gr.ctxsw_prog.hw_get_perf_counter_control_register_stride();
}
void gv11b_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,

View File

@@ -152,10 +152,6 @@ void gr_gv11b_program_sm_id_numbering(struct gk20a *g,
int gr_gv11b_load_smid_config(struct gk20a *g);
int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va);
int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c);
void gr_gv11b_write_zcull_ptr(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va);
void gr_gv11b_write_pm_ptr(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va);
void gr_gv11b_load_tpc_mask(struct gk20a *g);
void gr_gv11b_set_preemption_buffer_va(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va);

View File

@@ -35,6 +35,9 @@
#include "common/fb/fb_gp10b.h"
#include "common/fb/fb_gv11b.h"
#include "common/netlist/netlist_gv11b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
#include "common/therm/therm_gm20b.h"
#include "common/therm/therm_gp10b.h"
#include "common/therm/therm_gv11b.h"
@@ -350,8 +353,6 @@ static const struct gpu_ops gv11b_ops = {
.enable_exceptions = gr_gv11b_enable_exceptions,
.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
.get_hw_accessor_stream_out_mode =
gr_gv100_get_hw_accessor_stream_out_mode,
.get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon,
.set_pmm_register = gr_gv100_set_pmm_register,
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
@@ -368,8 +369,6 @@ static const struct gpu_ops gv11b_ops = {
.program_zcull_mapping = gr_gv11b_program_zcull_mapping,
.commit_global_timeslice = gr_gv11b_commit_global_timeslice,
.commit_inst = gr_gv11b_commit_inst,
.write_zcull_ptr = gr_gv11b_write_zcull_ptr,
.write_pm_ptr = gr_gv11b_write_pm_ptr,
.load_tpc_mask = gr_gv11b_load_tpc_mask,
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -424,7 +423,6 @@ static const struct gpu_ops gv11b_ops = {
.handle_tpc_sm_ecc_exception =
gr_gv11b_handle_tpc_sm_ecc_exception,
.decode_egpc_addr = gv11b_gr_decode_egpc_addr,
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
.init_gfxp_wfi_timeout_count =
gr_gv11b_init_gfxp_wfi_timeout_count,
.get_max_gfxp_wfi_timeout_count =
@@ -455,6 +453,93 @@ static const struct gpu_ops gv11b_ops = {
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
.alloc_gfxp_rtv_cb = NULL,
.commit_gfxp_rtv_cb = NULL,
.ctxsw_prog = {
.hw_get_fecs_header_size =
gm20b_ctxsw_prog_hw_get_fecs_header_size,
.hw_get_gpccs_header_size =
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
.hw_get_extended_buffer_segments_size_in_bytes =
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
.hw_extended_marker_size_in_bytes =
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
.hw_get_perf_counter_control_register_stride =
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
.get_main_image_ctx_id =
gm20b_ctxsw_prog_get_main_image_ctx_id,
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
.set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr,
.set_zcull = gm20b_ctxsw_prog_set_zcull,
.set_zcull_mode_no_ctxsw =
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
.is_zcull_mode_separate_buffer =
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
.set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr,
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
.set_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
.hw_get_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
.hw_get_pm_mode_stream_out_ctxsw =
gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw,
.set_pm_mode_stream_out_ctxsw =
gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw,
.init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data,
.set_compute_preemption_mode_cta =
gp10b_ctxsw_prog_set_compute_preemption_mode_cta,
.set_compute_preemption_mode_cilp =
gp10b_ctxsw_prog_set_compute_preemption_mode_cilp,
.set_graphics_preemption_mode_gfxp =
gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp,
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
.set_priv_access_map_config_mode =
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
.set_priv_access_map_addr =
gm20b_ctxsw_prog_set_priv_access_map_addr,
.disable_verif_features =
gm20b_ctxsw_prog_disable_verif_features,
.check_main_image_header_magic =
gm20b_ctxsw_prog_check_main_image_header_magic,
.check_local_header_magic =
gm20b_ctxsw_prog_check_local_header_magic,
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
.get_extended_buffer_size_offset =
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
.get_local_priv_register_ctl_offset =
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
.hw_get_ts_tag_invalid_timestamp =
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
.hw_record_ts_timestamp =
gm20b_ctxsw_prog_hw_record_ts_timestamp,
.hw_get_ts_record_size_in_bytes =
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
.get_ts_buffer_aperture_mask =
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
.set_pmu_options_boost_clock_frequencies =
gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies,
.set_full_preemption_ptr =
gv11b_ctxsw_prog_set_full_preemption_ptr,
.set_full_preemption_ptr_veid0 =
gv11b_ctxsw_prog_set_full_preemption_ptr_veid0,
.hw_get_perf_counter_register_stride =
gv11b_ctxsw_prog_hw_get_perf_counter_register_stride,
.set_context_buffer_ptr =
gv11b_ctxsw_prog_set_context_buffer_ptr,
.set_type_per_veid_header =
gv11b_ctxsw_prog_set_type_per_veid_header,
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
}
},
.fb = {
.init_hw = gv11b_fb_init_hw,
@@ -889,6 +974,7 @@ int gv11b_init_hal(struct gk20a *g)
gops->ltc = gv11b_ops.ltc;
gops->ce2 = gv11b_ops.ce2;
gops->gr = gv11b_ops.gr;
gops->gr.ctxsw_prog = gv11b_ops.gr.ctxsw_prog;
gops->fb = gv11b_ops.fb;
gops->clock_gating = gv11b_ops.clock_gating;
gops->fifo = gv11b_ops.fifo;

View File

@@ -31,7 +31,6 @@
#include <nvgpu/channel.h>
#include <nvgpu/hw/gv11b/hw_ram_gv11b.h>
#include <nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h>
#include <nvgpu/hw/gv11b/hw_gr_gv11b.h>
#include "gv11b/subctx_gv11b.h"
@@ -65,7 +64,8 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c)
nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header");
if (!nvgpu_mem_is_valid(ctxheader)) {
ret = nvgpu_dma_alloc_sys(g, ctxsw_prog_fecs_header_v(),
ret = nvgpu_dma_alloc_sys(g,
g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(),
ctxheader);
if (ret != 0) {
nvgpu_err(g, "failed to allocate sub ctx header");
@@ -100,7 +100,6 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
struct nvgpu_mem *ctxheader = &c->ctx_header;
struct gk20a *g = c->g;
int ret = 0;
u32 addr_lo, addr_hi;
struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *gr_ctx;
@@ -114,38 +113,20 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
g->ops.mm.l2_flush(g, true);
/* set priv access map */
addr_lo = u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
addr_hi = u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
nvgpu_mem_wr(g, ctxheader,
ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
addr_lo);
nvgpu_mem_wr(g, ctxheader,
ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
addr_hi);
g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, ctxheader,
gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
nvgpu_mem_wr(g, ctxheader,
ctxsw_prog_main_image_patch_adr_lo_o(),
addr_lo);
nvgpu_mem_wr(g, ctxheader,
ctxsw_prog_main_image_patch_adr_hi_o(),
addr_hi);
g->ops.gr.ctxsw_prog.set_patch_addr(g, ctxheader,
gr_ctx->patch_ctx.mem.gpu_va);
g->ops.gr.write_pm_ptr(g, ctxheader, gr_ctx->pm_ctx.mem.gpu_va);
g->ops.gr.write_zcull_ptr(g, ctxheader, gr_ctx->zcull_ctx.gpu_va);
g->ops.gr.ctxsw_prog.set_pm_ptr(g, ctxheader,
gr_ctx->pm_ctx.mem.gpu_va);
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, ctxheader,
gr_ctx->zcull_ctx.gpu_va);
addr_lo = u64_lo32(gpu_va);
addr_hi = u64_hi32(gpu_va);
g->ops.gr.ctxsw_prog.set_context_buffer_ptr(g, ctxheader, gpu_va);
nvgpu_mem_wr(g, ctxheader,
ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
nvgpu_mem_wr(g, ctxheader,
ctxsw_prog_main_image_context_buffer_ptr_o(), addr_lo);
nvgpu_mem_wr(g, ctxheader,
ctxsw_prog_main_image_ctl_o(),
ctxsw_prog_main_image_ctl_type_per_veid_header_v());
g->ops.gr.ctxsw_prog.set_type_per_veid_header(g, ctxheader);
return ret;
}

View File

@@ -45,13 +45,11 @@ struct gk20a_fecs_trace_record {
};
#ifdef CONFIG_GK20A_CTXSW_TRACE
u32 gk20a_fecs_trace_record_ts_tag_invalid_ts_v(void);
u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts);
u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts);
int gk20a_fecs_trace_num_ts(void);
int gk20a_fecs_trace_num_ts(struct gk20a *g);
struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(struct gk20a *g,
int idx);
bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r);
bool gk20a_fecs_trace_is_valid_record(struct gk20a *g,
struct gk20a_fecs_trace_record *r);
int gk20a_fecs_trace_get_read_index(struct gk20a *g);
int gk20a_fecs_trace_get_write_index(struct gk20a *g);

View File

@@ -342,7 +342,6 @@ struct gpu_ops {
int (*update_smpc_ctxsw_mode)(struct gk20a *g,
struct channel_gk20a *c,
bool enable);
u32 (*get_hw_accessor_stream_out_mode)(void);
int (*update_hwpm_ctxsw_mode)(struct gk20a *g,
struct channel_gk20a *c,
u64 gpu_va,
@@ -454,10 +453,6 @@ struct gpu_ops {
int (*commit_global_timeslice)(struct gk20a *g,
struct channel_gk20a *c);
int (*commit_inst)(struct channel_gk20a *c, u64 gpu_va);
void (*write_zcull_ptr)(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va);
void (*write_pm_ptr)(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va);
void (*set_preemption_buffer_va)(struct gk20a *g,
struct nvgpu_mem *mem, u64 gpu_va);
void (*load_tpc_mask)(struct gk20a *g);
@@ -479,8 +474,6 @@ struct gpu_ops {
u32 gpc, u32 tpc, u32 sm);
void (*resume_all_sms)(struct gk20a *g);
void (*disable_rd_coalesce)(struct gk20a *g);
void (*init_ctxsw_hdr_data)(struct gk20a *g,
struct nvgpu_mem *mem);
void (*init_gfxp_wfi_timeout_count)(struct gk20a *g);
unsigned long (*get_max_gfxp_wfi_timeout_count)
(struct gk20a *g);
@@ -539,6 +532,96 @@ struct gpu_ops {
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm);
void (*commit_gfxp_rtv_cb)(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, bool patch);
struct {
u32 (*hw_get_fecs_header_size)(void);
u32 (*hw_get_gpccs_header_size)(void);
u32 (*hw_get_extended_buffer_segments_size_in_bytes)(void);
u32 (*hw_extended_marker_size_in_bytes)(void);
u32 (*hw_get_perf_counter_control_register_stride)(void);
u32 (*hw_get_perf_counter_register_stride)(void);
u32 (*get_main_image_ctx_id)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
u32 (*get_patch_count)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void (*set_patch_count)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 count);
void (*set_patch_addr)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr);
void (*set_zcull_ptr)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr);
void (*set_zcull)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 mode);
void (*set_zcull_mode_no_ctxsw)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
bool (*is_zcull_mode_separate_buffer)(u32 mode);
void (*set_pm_ptr)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr);
void (*set_pm_mode)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 mode);
void (*set_pm_smpc_mode)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, bool enable);
u32 (*set_pm_mode_no_ctxsw)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
u32 (*set_pm_mode_ctxsw)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
u32 (*set_pm_mode_stream_out_ctxsw)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
u32 (*hw_get_pm_mode_no_ctxsw)(void);
u32 (*hw_get_pm_mode_ctxsw)(void);
u32 (*hw_get_pm_mode_stream_out_ctxsw)(void);
void (*init_ctxsw_hdr_data)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void (*set_compute_preemption_mode_cta)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void (*set_compute_preemption_mode_cilp)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void (*set_graphics_preemption_mode_gfxp)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void (*set_cde_enabled)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void (*set_pc_sampling)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, bool enable);
void (*set_priv_access_map_config_mode)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, bool allow_all);
void (*set_priv_access_map_addr)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr);
void (*disable_verif_features)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
bool (*check_main_image_header_magic)(u8 *context);
bool (*check_local_header_magic)(u8 *context);
u32 (*get_num_gpcs)(u8 *context);
u32 (*get_num_tpcs)(u8 *context);
void (*get_extended_buffer_size_offset)(u8 *context,
u32 *size, u32 *offset);
void (*get_ppc_info)(u8 *context,
u32 *num_ppcs, u32 *ppc_mask);
u32 (*get_local_priv_register_ctl_offset)(u8 *context);
u32 (*hw_get_ts_tag_invalid_timestamp)(void);
u32 (*hw_get_ts_tag)(u64 ts);
u64 (*hw_record_ts_timestamp)(u64 ts);
u32 (*hw_get_ts_record_size_in_bytes)(void);
u32 (*is_ts_valid_record)(u32 magic_hi);
u32 (*get_ts_buffer_aperture_mask)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void (*set_ts_num_records)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 num);
void (*set_ts_buffer_ptr)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr,
u32 aperture_mask);
void (*set_pmu_options_boost_clock_frequencies)(
struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 boosted_ctx);
void (*set_context_buffer_ptr)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr);
void (*set_full_preemption_ptr)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr);
void (*set_full_preemption_ptr_veid0)(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u64 addr);
void (*set_type_per_veid_header)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
void (*dump_ctxsw_stats)(struct gk20a *g,
struct nvgpu_mem *ctx_mem);
} ctxsw_prog;
} gr;
struct {
void (*init_hw)(struct gk20a *g);

View File

@@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void)
{
return 0x00000100U;
}
static inline u32 ctxsw_prog_gpccs_header_stride_v(void)
{
return 0x00000100U;
}
static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
{
return 0x00000008U;
@@ -118,6 +122,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
{
return U32(0x7U) << 0U;
}
static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
{
return 0x1U;
}
static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
{
return 0x0U;

View File

@@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void)
{
return 0x00000100U;
}
static inline u32 ctxsw_prog_gpccs_header_stride_v(void)
{
return 0x00000100U;
}
static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
{
return 0x00000008U;
@@ -102,6 +106,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
{
return U32(0x7U) << 0U;
}
static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
{
return 0x1U;
}
static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
{
return 0x0U;

View File

@@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void)
{
return 0x00000100U;
}
static inline u32 ctxsw_prog_gpccs_header_stride_v(void)
{
return 0x00000100U;
}
static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
{
return 0x00000008U;
@@ -106,6 +110,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
{
return U32(0x7U) << 0U;
}
static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
{
return 0x1U;
}
static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
{
return 0x0U;

View File

@@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void)
{
return 0x00000100U;
}
static inline u32 ctxsw_prog_gpccs_header_stride_v(void)
{
return 0x00000100U;
}
static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
{
return 0x00000008U;
@@ -142,6 +146,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
{
return U32(0x7U) << 0U;
}
static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
{
return 0x1U;
}
static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
{
return 0x0U;

View File

@@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void)
{
return 0x00000100U;
}
static inline u32 ctxsw_prog_gpccs_header_stride_v(void)
{
return 0x00000100U;
}
static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
{
return 0x00000008U;
@@ -142,6 +146,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
{
return U32(0x7U) << 0U;
}
static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
{
return 0x1U;
}
static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
{
return 0x0U;

View File

@@ -62,6 +62,10 @@ static inline u32 ctxsw_prog_fecs_header_v(void)
{
return 0x00000100U;
}
static inline u32 ctxsw_prog_gpccs_header_stride_v(void)
{
return 0x00000100U;
}
static inline u32 ctxsw_prog_main_image_num_gpcs_o(void)
{
return 0x00000008U;
@@ -142,6 +146,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_m(void)
{
return U32(0x7U) << 0U;
}
static inline u32 ctxsw_prog_main_image_pm_mode_ctxsw_f(void)
{
return 0x1U;
}
static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
{
return 0x0U;

View File

@@ -36,7 +36,6 @@
#include "os_linux.h"
#include "ctxsw_trace.h"
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)

View File

@@ -55,23 +55,24 @@ static int gk20a_fecs_trace_debugfs_ring_seq_show(
struct gk20a_fecs_trace_record *r =
gk20a_fecs_trace_get_record(g, *pos);
int i;
const u32 invalid_tag = gk20a_fecs_trace_record_ts_tag_invalid_ts_v();
const u32 invalid_tag =
g->ops.gr.ctxsw_prog.hw_get_ts_tag_invalid_timestamp();
u32 tag;
u64 timestamp;
seq_printf(s, "record #%lld (%p)\n", *pos, r);
seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo);
seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi);
if (gk20a_fecs_trace_is_valid_record(r)) {
if (gk20a_fecs_trace_is_valid_record(g, r)) {
seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr);
seq_printf(s, "\tcontext_id=%08x\n", r->context_id);
seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr);
seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id);
for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
for (i = 0; i < gk20a_fecs_trace_num_ts(g); i++) {
tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]);
if (tag == invalid_tag)
continue;
timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
timestamp = g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]);
timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp);
}

View File

@@ -31,7 +31,6 @@
#include "os_linux.h"
#include "ioctl_tsg.h"
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf,

View File

@@ -38,6 +38,9 @@
#include "common/fb/fb_tu104.h"
#include "common/xve/xve_gp106.h"
#include "common/netlist/netlist_tu104.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
#include "common/therm/therm_gm20b.h"
#include "common/therm/therm_gp10b.h"
#include "common/therm/therm_gp106.h"
@@ -412,8 +415,6 @@ static const struct gpu_ops tu104_ops = {
.enable_exceptions = gr_gv11b_enable_exceptions,
.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
.get_hw_accessor_stream_out_mode =
gr_gv100_get_hw_accessor_stream_out_mode,
.get_num_hwpm_perfmon = gr_gv100_get_num_hwpm_perfmon,
.set_pmm_register = gr_gv100_set_pmm_register,
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
@@ -430,8 +431,6 @@ static const struct gpu_ops tu104_ops = {
.program_zcull_mapping = gr_gv11b_program_zcull_mapping,
.commit_global_timeslice = gr_gv11b_commit_global_timeslice,
.commit_inst = gr_gv11b_commit_inst,
.write_zcull_ptr = gr_gv11b_write_zcull_ptr,
.write_pm_ptr = gr_gv11b_write_pm_ptr,
.load_tpc_mask = gr_gv11b_load_tpc_mask,
.trigger_suspend = gv11b_gr_sm_trigger_suspend,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -488,7 +487,6 @@ static const struct gpu_ops tu104_ops = {
.handle_tpc_sm_ecc_exception =
gr_gv11b_handle_tpc_sm_ecc_exception,
.decode_egpc_addr = gv11b_gr_decode_egpc_addr,
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
.init_gfxp_wfi_timeout_count =
gr_gv11b_init_gfxp_wfi_timeout_count,
.get_max_gfxp_wfi_timeout_count =
@@ -517,6 +515,93 @@ static const struct gpu_ops tu104_ops = {
.dump_gr_falcon_stats = gk20a_fecs_dump_falcon_stats,
.get_fecs_ctx_state_store_major_rev_id =
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
.ctxsw_prog = {
.hw_get_fecs_header_size =
gm20b_ctxsw_prog_hw_get_fecs_header_size,
.hw_get_gpccs_header_size =
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
.hw_get_extended_buffer_segments_size_in_bytes =
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
.hw_extended_marker_size_in_bytes =
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
.hw_get_perf_counter_control_register_stride =
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
.get_main_image_ctx_id =
gm20b_ctxsw_prog_get_main_image_ctx_id,
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
.set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr,
.set_zcull = gm20b_ctxsw_prog_set_zcull,
.set_zcull_mode_no_ctxsw =
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
.is_zcull_mode_separate_buffer =
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
.set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr,
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
.set_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
.hw_get_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
.hw_get_pm_mode_stream_out_ctxsw =
gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw,
.set_pm_mode_stream_out_ctxsw =
gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw,
.init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data,
.set_compute_preemption_mode_cta =
gm20b_ctxsw_prog_set_compute_preemption_mode_cta,
.set_compute_preemption_mode_cilp =
gp10b_ctxsw_prog_set_compute_preemption_mode_cilp,
.set_graphics_preemption_mode_gfxp =
gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp,
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
.set_priv_access_map_config_mode =
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
.set_priv_access_map_addr =
gm20b_ctxsw_prog_set_priv_access_map_addr,
.disable_verif_features =
gm20b_ctxsw_prog_disable_verif_features,
.check_main_image_header_magic =
gm20b_ctxsw_prog_check_main_image_header_magic,
.check_local_header_magic =
gm20b_ctxsw_prog_check_local_header_magic,
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
.get_extended_buffer_size_offset =
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
.get_local_priv_register_ctl_offset =
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
.hw_get_ts_tag_invalid_timestamp =
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
.hw_record_ts_timestamp =
gm20b_ctxsw_prog_hw_record_ts_timestamp,
.hw_get_ts_record_size_in_bytes =
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
.get_ts_buffer_aperture_mask =
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
.set_pmu_options_boost_clock_frequencies =
gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies,
.set_full_preemption_ptr =
gv11b_ctxsw_prog_set_full_preemption_ptr,
.set_full_preemption_ptr_veid0 =
gv11b_ctxsw_prog_set_full_preemption_ptr_veid0,
.hw_get_perf_counter_register_stride =
gv11b_ctxsw_prog_hw_get_perf_counter_register_stride,
.set_context_buffer_ptr =
gv11b_ctxsw_prog_set_context_buffer_ptr,
.set_type_per_veid_header =
gv11b_ctxsw_prog_set_type_per_veid_header,
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
}
},
.fb = {
.init_hw = gv11b_fb_init_hw,
@@ -1027,6 +1112,7 @@ int tu104_init_hal(struct gk20a *g)
gops->ltc = tu104_ops.ltc;
gops->ce2 = tu104_ops.ce2;
gops->gr = tu104_ops.gr;
gops->gr.ctxsw_prog = tu104_ops.gr.ctxsw_prog;
gops->fb = tu104_ops.fb;
gops->clock_gating = tu104_ops.clock_gating;
gops->fifo = tu104_ops.fifo;

View File

@@ -28,6 +28,8 @@
#include "common/fb/fb_gm20b.h"
#include "common/fb/fb_gp10b.h"
#include "common/netlist/netlist_gp10b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
#include "common/therm/therm_gm20b.h"
#include "common/therm/therm_gp10b.h"
#include "common/ltc/ltc_gm20b.h"
@@ -176,8 +178,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.program_zcull_mapping = NULL,
.commit_global_timeslice = NULL,
.commit_inst = vgpu_gr_commit_inst,
.write_zcull_ptr = gr_gk20a_write_zcull_ptr,
.write_pm_ptr = gr_gk20a_write_pm_ptr,
.load_tpc_mask = NULL,
.trigger_suspend = NULL,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -212,7 +212,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.set_bes_crop_debug4 = NULL,
.set_ctxsw_preemption_mode =
vgpu_gr_gp10b_set_ctxsw_preemption_mode,
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
.init_gfxp_wfi_timeout_count =
gr_gp10b_init_gfxp_wfi_timeout_count,
.get_max_gfxp_wfi_timeout_count =
@@ -230,6 +229,81 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.get_offset_in_gpccs_segment =
gr_gk20a_get_offset_in_gpccs_segment,
.set_debug_mode = gm20b_gr_set_debug_mode,
.ctxsw_prog = {
.hw_get_fecs_header_size =
gm20b_ctxsw_prog_hw_get_fecs_header_size,
.hw_get_gpccs_header_size =
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
.hw_get_extended_buffer_segments_size_in_bytes =
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
.hw_extended_marker_size_in_bytes =
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
.hw_get_perf_counter_control_register_stride =
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
.get_main_image_ctx_id =
gm20b_ctxsw_prog_get_main_image_ctx_id,
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
.set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr,
.set_zcull = gm20b_ctxsw_prog_set_zcull,
.set_zcull_mode_no_ctxsw =
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
.is_zcull_mode_separate_buffer =
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
.set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr,
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
.set_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
.hw_get_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
.init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data,
.set_compute_preemption_mode_cta =
gp10b_ctxsw_prog_set_compute_preemption_mode_cta,
.set_compute_preemption_mode_cilp =
gp10b_ctxsw_prog_set_compute_preemption_mode_cilp,
.set_graphics_preemption_mode_gfxp =
gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp,
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
.set_priv_access_map_config_mode =
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
.set_priv_access_map_addr =
gm20b_ctxsw_prog_set_priv_access_map_addr,
.disable_verif_features =
gm20b_ctxsw_prog_disable_verif_features,
.check_main_image_header_magic =
gm20b_ctxsw_prog_check_main_image_header_magic,
.check_local_header_magic =
gm20b_ctxsw_prog_check_local_header_magic,
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
.get_extended_buffer_size_offset =
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
.get_local_priv_register_ctl_offset =
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
.hw_get_ts_tag_invalid_timestamp =
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
.hw_record_ts_timestamp =
gm20b_ctxsw_prog_hw_record_ts_timestamp,
.hw_get_ts_record_size_in_bytes =
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
.get_ts_buffer_aperture_mask =
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
.set_pmu_options_boost_clock_frequencies =
gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies,
.set_full_preemption_ptr =
gp10b_ctxsw_prog_set_full_preemption_ptr,
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
}
},
.fb = {
.init_hw = NULL,
@@ -575,6 +649,7 @@ int vgpu_gp10b_init_hal(struct gk20a *g)
gops->ltc = vgpu_gp10b_ops.ltc;
gops->ce2 = vgpu_gp10b_ops.ce2;
gops->gr = vgpu_gp10b_ops.gr;
gops->gr.ctxsw_prog = vgpu_gp10b_ops.gr.ctxsw_prog;
gops->fb = vgpu_gp10b_ops.fb;
gops->clock_gating = vgpu_gp10b_ops.clock_gating;
gops->fifo = vgpu_gp10b_ops.fifo;

View File

@@ -39,7 +39,6 @@
#include "gk20a/fecs_trace_gk20a.h"
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
void vgpu_gr_detect_sm_arch(struct gk20a *g)
{
@@ -614,7 +613,7 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
}
/* PM ctxt switch is off by default */
gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
gr_ctx->pm_ctx.pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw();
nvgpu_log_fn(g, "done");
return 0;
@@ -1087,18 +1086,21 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
* will return an error due to using the same GPU VA twice.
*/
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
if (pm_ctx->pm_mode ==
g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw()) {
return 0;
}
p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
} else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
if (pm_ctx->pm_mode ==
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) {
return 0;
}
p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
} else if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
(g->ops.gr.get_hw_accessor_stream_out_mode)){
if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) {
g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) {
if (pm_ctx->pm_mode ==
g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) {
return 0;
}
p->mode = TEGRA_VGPU_CTXSW_MODE_STREAM_OUT_CTXSW;
@@ -1130,11 +1132,14 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
err = err ? err : msg.ret;
if (!err) {
if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW) {
pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
pm_ctx->pm_mode =
g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw();
} else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
pm_ctx->pm_mode =
g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw();
} else {
pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode();
pm_ctx->pm_mode =
g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw();
}
}

View File

@@ -29,6 +29,9 @@
#include "common/fb/fb_gp10b.h"
#include "common/fb/fb_gv11b.h"
#include "common/netlist/netlist_gv11b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gm20b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gp10b.h"
#include "common/gr/ctxsw_prog/ctxsw_prog_gv11b.h"
#include "common/therm/therm_gm20b.h"
#include "common/therm/therm_gp10b.h"
#include "common/therm/therm_gv11b.h"
@@ -177,8 +180,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.enable_exceptions = NULL,
.get_lrf_tex_ltc_dram_override = NULL,
.update_smpc_ctxsw_mode = vgpu_gr_update_smpc_ctxsw_mode,
.get_hw_accessor_stream_out_mode =
gr_gv100_get_hw_accessor_stream_out_mode,
.update_hwpm_ctxsw_mode = vgpu_gr_update_hwpm_ctxsw_mode,
.record_sm_error_state = gv11b_gr_record_sm_error_state,
.clear_sm_error_state = vgpu_gr_clear_sm_error_state,
@@ -192,8 +193,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.program_zcull_mapping = NULL,
.commit_global_timeslice = NULL,
.commit_inst = vgpu_gr_gv11b_commit_inst,
.write_zcull_ptr = gr_gv11b_write_zcull_ptr,
.write_pm_ptr = gr_gv11b_write_pm_ptr,
.load_tpc_mask = NULL,
.trigger_suspend = NULL,
.wait_for_pause = gr_gk20a_wait_for_pause,
@@ -247,7 +246,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.handle_tpc_sm_ecc_exception =
gr_gv11b_handle_tpc_sm_ecc_exception,
.decode_egpc_addr = gv11b_gr_decode_egpc_addr,
.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
.init_gfxp_wfi_timeout_count =
gr_gv11b_init_gfxp_wfi_timeout_count,
.get_max_gfxp_wfi_timeout_count =
@@ -265,6 +263,93 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.get_offset_in_gpccs_segment =
gr_gk20a_get_offset_in_gpccs_segment,
.set_debug_mode = gm20b_gr_set_debug_mode,
.ctxsw_prog = {
.hw_get_fecs_header_size =
gm20b_ctxsw_prog_hw_get_fecs_header_size,
.hw_get_gpccs_header_size =
gm20b_ctxsw_prog_hw_get_gpccs_header_size,
.hw_get_extended_buffer_segments_size_in_bytes =
gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes,
.hw_extended_marker_size_in_bytes =
gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes,
.hw_get_perf_counter_control_register_stride =
gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride,
.get_main_image_ctx_id =
gm20b_ctxsw_prog_get_main_image_ctx_id,
.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
.set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr,
.set_zcull = gm20b_ctxsw_prog_set_zcull,
.set_zcull_mode_no_ctxsw =
gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
.is_zcull_mode_separate_buffer =
gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
.set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr,
.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
.set_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_set_pm_mode_no_ctxsw,
.set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw,
.hw_get_pm_mode_no_ctxsw =
gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw,
.hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw,
.hw_get_pm_mode_stream_out_ctxsw =
gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw,
.set_pm_mode_stream_out_ctxsw =
gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw,
.init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data,
.set_compute_preemption_mode_cta =
gp10b_ctxsw_prog_set_compute_preemption_mode_cta,
.set_compute_preemption_mode_cilp =
gp10b_ctxsw_prog_set_compute_preemption_mode_cilp,
.set_graphics_preemption_mode_gfxp =
gp10b_ctxsw_prog_set_graphics_preemption_mode_gfxp,
.set_cde_enabled = gm20b_ctxsw_prog_set_cde_enabled,
.set_pc_sampling = gm20b_ctxsw_prog_set_pc_sampling,
.set_priv_access_map_config_mode =
gm20b_ctxsw_prog_set_priv_access_map_config_mode,
.set_priv_access_map_addr =
gm20b_ctxsw_prog_set_priv_access_map_addr,
.disable_verif_features =
gm20b_ctxsw_prog_disable_verif_features,
.check_main_image_header_magic =
gm20b_ctxsw_prog_check_main_image_header_magic,
.check_local_header_magic =
gm20b_ctxsw_prog_check_local_header_magic,
.get_num_gpcs = gm20b_ctxsw_prog_get_num_gpcs,
.get_num_tpcs = gm20b_ctxsw_prog_get_num_tpcs,
.get_extended_buffer_size_offset =
gm20b_ctxsw_prog_get_extended_buffer_size_offset,
.get_ppc_info = gm20b_ctxsw_prog_get_ppc_info,
.get_local_priv_register_ctl_offset =
gm20b_ctxsw_prog_get_local_priv_register_ctl_offset,
.hw_get_ts_tag_invalid_timestamp =
gm20b_ctxsw_prog_hw_get_ts_tag_invalid_timestamp,
.hw_get_ts_tag = gm20b_ctxsw_prog_hw_get_ts_tag,
.hw_record_ts_timestamp =
gm20b_ctxsw_prog_hw_record_ts_timestamp,
.hw_get_ts_record_size_in_bytes =
gm20b_ctxsw_prog_hw_get_ts_record_size_in_bytes,
.is_ts_valid_record = gm20b_ctxsw_prog_is_ts_valid_record,
.get_ts_buffer_aperture_mask =
gm20b_ctxsw_prog_get_ts_buffer_aperture_mask,
.set_ts_num_records = gm20b_ctxsw_prog_set_ts_num_records,
.set_ts_buffer_ptr = gm20b_ctxsw_prog_set_ts_buffer_ptr,
.set_pmu_options_boost_clock_frequencies =
gp10b_ctxsw_prog_set_pmu_options_boost_clock_frequencies,
.set_full_preemption_ptr =
gv11b_ctxsw_prog_set_full_preemption_ptr,
.set_full_preemption_ptr_veid0 =
gv11b_ctxsw_prog_set_full_preemption_ptr_veid0,
.hw_get_perf_counter_register_stride =
gv11b_ctxsw_prog_hw_get_perf_counter_register_stride,
.set_context_buffer_ptr =
gv11b_ctxsw_prog_set_context_buffer_ptr,
.set_type_per_veid_header =
gv11b_ctxsw_prog_set_type_per_veid_header,
.dump_ctxsw_stats = gp10b_ctxsw_prog_dump_ctxsw_stats,
}
},
.fb = {
.init_hw = NULL,
@@ -640,6 +725,7 @@ int vgpu_gv11b_init_hal(struct gk20a *g)
gops->ltc = vgpu_gv11b_ops.ltc;
gops->ce2 = vgpu_gv11b_ops.ce2;
gops->gr = vgpu_gv11b_ops.gr;
gops->gr.ctxsw_prog = vgpu_gv11b_ops.gr.ctxsw_prog;
gops->fb = vgpu_gv11b_ops.fb;
gops->clock_gating = vgpu_gv11b_ops.clock_gating;
gops->fifo = vgpu_gv11b_ops.fifo;

View File

@@ -27,7 +27,6 @@
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/hw/gv11b/hw_ctxsw_prog_gv11b.h>
int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c)
{
@@ -41,8 +40,8 @@ int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c)
msg.handle = vgpu_get_handle(c->g);
p->ch_handle = c->virt_ctx;
p->ctx_header_va = __nvgpu_vm_alloc_va(c->vm,
ctxsw_prog_fecs_header_v(),
GMMU_PAGE_SIZE_KERNEL);
c->g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(),
GMMU_PAGE_SIZE_KERNEL);
if (!p->ctx_header_va) {
nvgpu_err(c->g, "alloc va failed for ctx_header");
return -ENOMEM;