mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: enable HWPM Mode-E context switch
- Write new pm mode to context buffer header. Ucode use this mode to enable mode-e context switch. This is Mode-B context switch of PMs with Mode-E streamout on one context. If this mode is set, Ucode makes sure that Mode-E pipe (perfmons, routers, pma) is idle before it context switches PMs. - This allows us to collect counters in a secure way (i.e. on context basis) with stream out. Bug 2106999 Change-Id: I5a7435f09d1bf053ca428e538b0a57f3a175ac37 Signed-off-by: Vaibhav Kachore <vkachore@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1760366 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
4cd59404a2
commit
e14fdcd8f1
@@ -135,4 +135,13 @@ int gk20a_perfbuf_disable_locked(struct gk20a *g);
|
||||
void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s);
|
||||
u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s,
|
||||
bool mode);
|
||||
|
||||
/* PM Context Switch Mode */
|
||||
/*This mode says that the pms are not to be context switched. */
|
||||
#define NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW (0x00000000)
|
||||
/* This mode says that the pms in Mode-B are to be context switched */
|
||||
#define NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW (0x00000001)
|
||||
/* This mode says that the pms in Mode-E (stream out) are to be context switched. */
|
||||
#define NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW (0x00000002)
|
||||
|
||||
#endif /* DBG_GPU_GK20A_H */
|
||||
|
||||
@@ -307,10 +307,11 @@ struct gpu_ops {
|
||||
int (*update_smpc_ctxsw_mode)(struct gk20a *g,
|
||||
struct channel_gk20a *c,
|
||||
bool enable);
|
||||
u32 (*get_hw_accessor_stream_out_mode)(void);
|
||||
int (*update_hwpm_ctxsw_mode)(struct gk20a *g,
|
||||
struct channel_gk20a *c,
|
||||
u64 gpu_va,
|
||||
bool enable);
|
||||
u32 mode);
|
||||
int (*dump_gr_regs)(struct gk20a *g,
|
||||
struct gk20a_debug_output *o);
|
||||
int (*update_pc_sampling)(struct channel_gk20a *ch,
|
||||
|
||||
@@ -1684,14 +1684,14 @@ out:
|
||||
int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
struct channel_gk20a *c,
|
||||
u64 gpu_va,
|
||||
bool enable_hwpm_ctxsw)
|
||||
u32 mode)
|
||||
{
|
||||
struct tsg_gk20a *tsg;
|
||||
struct nvgpu_mem *gr_mem = NULL;
|
||||
struct nvgpu_gr_ctx *gr_ctx;
|
||||
struct pm_ctx_desc *pm_ctx;
|
||||
u32 data;
|
||||
u64 virt_addr;
|
||||
u64 virt_addr = 0;
|
||||
struct ctx_header_desc *ctx = &c->ctx_header;
|
||||
struct nvgpu_mem *ctxheader = &ctx->mem;
|
||||
int ret;
|
||||
@@ -1710,12 +1710,31 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (enable_hwpm_ctxsw) {
|
||||
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
|
||||
if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
|
||||
(!g->ops.gr.get_hw_accessor_stream_out_mode)) {
|
||||
nvgpu_err(g, "Mode-E hwpm context switch mode is not supported");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW:
|
||||
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
|
||||
return 0;
|
||||
} else {
|
||||
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f())
|
||||
}
|
||||
break;
|
||||
case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW:
|
||||
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
|
||||
if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) {
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
nvgpu_err(g, "invalid hwpm context switch mode");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = gk20a_disable_channel_tsg(g, c);
|
||||
@@ -1735,7 +1754,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
Flush and invalidate before cpu update. */
|
||||
g->ops.mm.l2_flush(g, true);
|
||||
|
||||
if (enable_hwpm_ctxsw) {
|
||||
if (mode != NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
|
||||
/* Allocate buffer if necessary */
|
||||
if (pm_ctx->mem.gpu_va == 0) {
|
||||
ret = nvgpu_dma_alloc_sys(g,
|
||||
@@ -1768,11 +1787,16 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
|
||||
data = data & ~ctxsw_prog_main_image_pm_mode_m();
|
||||
|
||||
if (enable_hwpm_ctxsw) {
|
||||
switch (mode) {
|
||||
case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW:
|
||||
pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
|
||||
|
||||
virt_addr = pm_ctx->mem.gpu_va;
|
||||
} else {
|
||||
break;
|
||||
case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
|
||||
pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode();
|
||||
virt_addr = pm_ctx->mem.gpu_va;
|
||||
break;
|
||||
case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW:
|
||||
pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
|
||||
virt_addr = 0;
|
||||
}
|
||||
@@ -1892,7 +1916,7 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
|
||||
* for PM context switching, including mode and possibly a pointer to
|
||||
* the PM backing store.
|
||||
*/
|
||||
if (gr_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
|
||||
if (gr_ctx->pm_ctx.pm_mode != ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
|
||||
if (gr_ctx->pm_ctx.mem.gpu_va == 0) {
|
||||
nvgpu_err(g,
|
||||
"context switched pm with no pm buffer!");
|
||||
|
||||
@@ -655,7 +655,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
|
||||
int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
struct channel_gk20a *c,
|
||||
u64 gpu_va,
|
||||
bool enable_hwpm_ctxsw);
|
||||
u32 mode);
|
||||
|
||||
struct nvgpu_gr_ctx;
|
||||
void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx,
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#include <nvgpu/hw/gv100/hw_proj_gv100.h>
|
||||
#include <nvgpu/hw/gv100/hw_fuse_gv100.h>
|
||||
#include <nvgpu/hw/gv100/hw_top_gv100.h>
|
||||
#include <nvgpu/hw/gv100/hw_ctxsw_prog_gv100.h>
|
||||
|
||||
|
||||
/*
|
||||
@@ -453,3 +454,8 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u32 gr_gv100_get_hw_accessor_stream_out_mode()
|
||||
{
|
||||
return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f();
|
||||
}
|
||||
|
||||
@@ -47,4 +47,5 @@ void gr_gv100_split_fbpa_broadcast_addr(struct gk20a *g, u32 addr,
|
||||
u32 num_fbpas,
|
||||
u32 *priv_addr_table, u32 *t);
|
||||
void gr_gv100_init_gpc_mmu(struct gk20a *g);
|
||||
u32 gr_gv100_get_hw_accessor_stream_out_mode(void);
|
||||
#endif
|
||||
|
||||
@@ -358,6 +358,8 @@ static const struct gpu_ops gv100_ops = {
|
||||
.enable_exceptions = gr_gv11b_enable_exceptions,
|
||||
.get_lrf_tex_ltc_dram_override = get_ecc_override_val,
|
||||
.update_smpc_ctxsw_mode = gr_gk20a_update_smpc_ctxsw_mode,
|
||||
.get_hw_accessor_stream_out_mode =
|
||||
gr_gv100_get_hw_accessor_stream_out_mode,
|
||||
.update_hwpm_ctxsw_mode = gr_gk20a_update_hwpm_ctxsw_mode,
|
||||
.record_sm_error_state = gv11b_gr_record_sm_error_state,
|
||||
.update_sm_error_state = gv11b_gr_update_sm_error_state,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -144,6 +144,10 @@ static inline u32 ctxsw_prog_main_image_pm_mode_no_ctxsw_f(void)
|
||||
{
|
||||
return 0x0U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(void)
|
||||
{
|
||||
return 0x2U;
|
||||
}
|
||||
static inline u32 ctxsw_prog_main_image_pm_smpc_mode_m(void)
|
||||
{
|
||||
return 0x7U << 3U;
|
||||
|
||||
@@ -356,6 +356,7 @@ struct tegra_vgpu_fecs_trace_filter {
|
||||
enum {
|
||||
TEGRA_VGPU_CTXSW_MODE_NO_CTXSW = 0,
|
||||
TEGRA_VGPU_CTXSW_MODE_CTXSW,
|
||||
TEGRA_VGPU_CTXSW_MODE_STREAM_OUT_CTXSW,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
||||
@@ -1039,12 +1039,33 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert linux hwpm ctxsw mode type of the form of NVGPU_DBG_GPU_HWPM_CTXSW_MODE_*
|
||||
* into common hwpm ctxsw mode type of the form of NVGPU_DBG_HWPM_CTXSW_MODE_*
|
||||
*/
|
||||
|
||||
static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode)
|
||||
{
|
||||
switch (mode){
|
||||
case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW:
|
||||
return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW;
|
||||
case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW:
|
||||
return NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW;
|
||||
case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW:
|
||||
return NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW;
|
||||
}
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
|
||||
static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
|
||||
struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args)
|
||||
{
|
||||
int err;
|
||||
struct gk20a *g = dbg_s->g;
|
||||
struct channel_gk20a *ch_gk20a;
|
||||
u32 mode = nvgpu_hwpm_ctxsw_mode_to_common_mode(args->mode);
|
||||
|
||||
nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode);
|
||||
|
||||
@@ -1080,7 +1101,8 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
|
||||
goto clean_up;
|
||||
}
|
||||
err = g->ops.gr.update_hwpm_ctxsw_mode(g, ch_gk20a, 0,
|
||||
args->mode == NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW);
|
||||
mode);
|
||||
|
||||
if (err)
|
||||
nvgpu_err(g,
|
||||
"error (%d) during pm ctxsw mode update", err);
|
||||
|
||||
@@ -1066,7 +1066,7 @@ int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
|
||||
}
|
||||
|
||||
int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
struct channel_gk20a *ch, u64 gpu_va, bool enable)
|
||||
struct channel_gk20a *ch, u64 gpu_va, u32 mode)
|
||||
{
|
||||
struct tsg_gk20a *tsg;
|
||||
struct nvgpu_gr_ctx *ch_ctx;
|
||||
@@ -1089,16 +1089,33 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
ch_ctx = &tsg->gr_ctx;
|
||||
pm_ctx = &ch_ctx->pm_ctx;
|
||||
|
||||
if (enable) {
|
||||
if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW) {
|
||||
/*
|
||||
* send command to enable HWPM only once - otherwise server
|
||||
* will return an error due to using the same GPU VA twice.
|
||||
*/
|
||||
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
|
||||
|
||||
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
|
||||
return 0;
|
||||
|
||||
}
|
||||
p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
|
||||
} else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
|
||||
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) {
|
||||
return 0;
|
||||
}
|
||||
p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
|
||||
} else if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) &&
|
||||
(g->ops.gr.get_hw_accessor_stream_out_mode)){
|
||||
if (pm_ctx->pm_mode == g->ops.gr.get_hw_accessor_stream_out_mode()) {
|
||||
return 0;
|
||||
}
|
||||
p->mode = TEGRA_VGPU_CTXSW_MODE_STREAM_OUT_CTXSW;
|
||||
} else {
|
||||
nvgpu_err(g, "invalid hwpm context switch mode");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (mode != NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
|
||||
/* Allocate buffer if necessary */
|
||||
if (pm_ctx->mem.gpu_va == 0) {
|
||||
pm_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch->vm,
|
||||
@@ -1109,11 +1126,6 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
return -ENOMEM;
|
||||
pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size;
|
||||
}
|
||||
} else {
|
||||
if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f())
|
||||
return 0;
|
||||
|
||||
p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
|
||||
}
|
||||
|
||||
msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE;
|
||||
@@ -1124,10 +1136,15 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
|
||||
WARN_ON(err || msg.ret);
|
||||
err = err ? err : msg.ret;
|
||||
if (!err)
|
||||
pm_ctx->pm_mode = enable ?
|
||||
ctxsw_prog_main_image_pm_mode_ctxsw_f() :
|
||||
ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
|
||||
if (!err) {
|
||||
if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW) {
|
||||
pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_ctxsw_f();
|
||||
} else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) {
|
||||
pm_ctx->pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
|
||||
} else {
|
||||
pm_ctx->pm_mode = g->ops.gr.get_hw_accessor_stream_out_mode();
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -58,7 +58,7 @@ int vgpu_gr_set_sm_debug_mode(struct gk20a *g,
|
||||
int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
|
||||
struct channel_gk20a *ch, bool enable);
|
||||
int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
|
||||
struct channel_gk20a *ch, u64 gpu_va, bool enable);
|
||||
struct channel_gk20a *ch, u64 gpu_va, u32 mode);
|
||||
int vgpu_gr_clear_sm_error_state(struct gk20a *g,
|
||||
struct channel_gk20a *ch, u32 sm_id);
|
||||
int vgpu_gr_suspend_contexts(struct gk20a *g,
|
||||
|
||||
@@ -1286,8 +1286,12 @@ struct nvgpu_dbg_gpu_set_next_stop_trigger_type_args {
|
||||
|
||||
|
||||
/* PM Context Switch Mode */
|
||||
/*This mode says that the pms are not to be context switched. */
|
||||
#define NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW (0x00000000)
|
||||
/* This mode says that the pms in Mode-B are to be context switched */
|
||||
#define NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW (0x00000001)
|
||||
/* This mode says that the pms in Mode-E (stream out) are to be context switched. */
|
||||
#define NVGPU_DBG_GPU_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW (0x00000002)
|
||||
|
||||
struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args {
|
||||
__u32 mode;
|
||||
|
||||
Reference in New Issue
Block a user