From fe27a7f934bc30de243e4c35a89e870d91fbd884 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Thu, 24 Jan 2019 15:01:52 +0530 Subject: [PATCH] gpu: nvgpu: add gr/ctx and gr/subctx APIs to set hwpm ctxsw mode gr_gk20a_update_hwpm_ctxsw_mode() right now validates the incoming hwpm mode, checks if it is already set, and if not, it will go ahead and set the new hwpm mode by calling g->ops.gr.ctxsw_prog HALs Instead of programming hwpm mode in gr_gk20a.c, move the programming to gr/ctx and gr/subctx units by adding below APIs nvgpu_gr_ctx_prepare_hwpm_mode() - validate the incoming mode and check if it is already set nvgpu_gr_ctx_set_hwpm_mode() - set pm mode in graphics context nvgpu_gr_subctx_set_hwpm_mode() - set pm mode in subcontext Add gpu_va field to struct pm_ctx_desc to store the gpu_va to be programmed into context Rename NVGPU_DBG_HWPM_CTXSW_MODE_* to NVGPU_GR_CTX_HWPM_CTXSW_MODE_* and move them to gr/ctx.h Remove below HALs since they are no longer used g->ops.gr.ctxsw_prog.set_pm_mode_no_ctxsw() g->ops.gr.ctxsw_prog.set_pm_mode_ctxsw() g->ops.gr.ctxsw_prog.set_pm_mode_stream_out_ctxsw() Jira NVGPU-1527 Jira NVGPU-1613 Change-Id: Id2a4d498182ec0e3586dc7265f73a25870ca2ef7 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/2011093 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/gr/ctx.c | 80 ++++++++++++ .../common/gr/ctxsw_prog/ctxsw_prog_gm20b.c | 17 --- .../common/gr/ctxsw_prog/ctxsw_prog_gm20b.h | 4 - .../common/gr/ctxsw_prog/ctxsw_prog_gv11b.c | 8 -- .../common/gr/ctxsw_prog/ctxsw_prog_gv11b.h | 2 - drivers/gpu/nvgpu/common/gr/subctx.c | 7 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 122 +++++------------- drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 3 - drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 3 - drivers/gpu/nvgpu/gv100/hal_gv100.c | 5 - drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 5 - drivers/gpu/nvgpu/include/nvgpu/debugger.h | 8 -- drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 6 - drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h | 15 +++ drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h | 3 + drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | 7 +- drivers/gpu/nvgpu/tu104/hal_tu104.c | 5 - drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | 3 - drivers/gpu/nvgpu/vgpu/gr_vgpu.c | 12 +- drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c | 5 - 20 files changed, 147 insertions(+), 173 deletions(-) diff --git a/drivers/gpu/nvgpu/common/gr/ctx.c b/drivers/gpu/nvgpu/common/gr/ctx.c index 233a8d493..fb9832470 100644 --- a/drivers/gpu/nvgpu/common/gr/ctx.c +++ b/drivers/gpu/nvgpu/common/gr/ctx.c @@ -673,3 +673,83 @@ int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, return err; } + +int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + u32 mode, bool *skip_update) +{ + struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx; + + *skip_update = false; + + if (!nvgpu_mem_is_valid(&gr_ctx->mem)) { + nvgpu_err(g, "no graphics context allocated"); + return -EFAULT; + } + + if ((mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) && + (g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw == NULL)) { + nvgpu_err(g, + "Mode-E hwpm context switch mode is not supported"); + return -EINVAL; + } + + switch (mode) { + case NVGPU_GR_CTX_HWPM_CTXSW_MODE_CTXSW: + if (pm_ctx->pm_mode == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw()) { + *skip_update = true; + return 0; + } + pm_ctx->pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw(); + pm_ctx->gpu_va = pm_ctx->mem.gpu_va; + break; + case NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW: + if (pm_ctx->pm_mode == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) { + *skip_update = true; + return 0; + } + pm_ctx->pm_mode = + g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw(); + pm_ctx->gpu_va = 0; + break; + case NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW: + if (pm_ctx->pm_mode == + g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) { + *skip_update = true; + return 0; + } + pm_ctx->pm_mode = + g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw(); + pm_ctx->gpu_va = pm_ctx->mem.gpu_va; + break; + default: + nvgpu_err(g, "invalid hwpm context switch mode"); + return -EINVAL; + } + + return 0; +} + +int nvgpu_gr_ctx_set_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + bool set_pm_ptr) +{ + int err; + + /* Channel gr_ctx buffer is gpu cacheable. + Flush and invalidate before cpu update. */ + err = g->ops.mm.l2_flush(g, true); + if (err != 0) { + nvgpu_err(g, "l2_flush failed"); + return err; + } + + g->ops.gr.ctxsw_prog.set_pm_mode(g, &gr_ctx->mem, + gr_ctx->pm_ctx.pm_mode); + if (set_pm_ptr) { + g->ops.gr.ctxsw_prog.set_pm_ptr(g, &gr_ctx->mem, + gr_ctx->pm_ctx.gpu_va); + } + + return err; +} diff --git a/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.c b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.c index da66a1b7d..a95908a92 100644 --- a/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.c +++ b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.c @@ -141,23 +141,6 @@ void gm20b_ctxsw_prog_set_pm_smpc_mode(struct gk20a *g, nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data); } -u32 gm20b_ctxsw_prog_set_pm_mode_no_ctxsw(struct gk20a *g, - struct nvgpu_mem *ctx_mem) -{ - gm20b_ctxsw_prog_set_pm_mode(g, ctx_mem, - ctxsw_prog_main_image_pm_mode_no_ctxsw_f()); - return ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); -} - -u32 gm20b_ctxsw_prog_set_pm_mode_ctxsw(struct gk20a *g, - struct nvgpu_mem *ctx_mem) -{ - gm20b_ctxsw_prog_set_pm_mode(g, ctx_mem, - ctxsw_prog_main_image_pm_mode_ctxsw_f()); - return ctxsw_prog_main_image_pm_mode_ctxsw_f(); -} - - u32 gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw(void) { return ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); diff --git a/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.h b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.h index fc2be9728..fd3ce60dc 100644 --- a/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.h +++ b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gm20b.h @@ -52,10 +52,6 @@ void gm20b_ctxsw_prog_set_pm_mode(struct gk20a *g, struct nvgpu_mem *ctx_mem, u32 mode); void gm20b_ctxsw_prog_set_pm_smpc_mode(struct gk20a *g, struct nvgpu_mem *ctx_mem, bool enable); -u32 gm20b_ctxsw_prog_set_pm_mode_no_ctxsw(struct gk20a *g, - struct nvgpu_mem *ctx_mem); -u32 gm20b_ctxsw_prog_set_pm_mode_ctxsw(struct gk20a *g, - struct nvgpu_mem *ctx_mem); u32 gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw(void); u32 gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw(void); void gm20b_ctxsw_prog_init_ctxsw_hdr_data(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.c b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.c index aecc9cd4d..a01fd7c42 100644 --- a/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.c +++ b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.c @@ -54,14 +54,6 @@ u32 gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw(void) return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); } -u32 gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw(struct gk20a *g, - struct nvgpu_mem *ctx_mem) -{ - gm20b_ctxsw_prog_set_pm_mode(g, ctx_mem, - ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f()); - return ctxsw_prog_main_image_pm_mode_stream_out_ctxsw_f(); -} - void gv11b_ctxsw_prog_set_full_preemption_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem, u64 addr) { diff --git a/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.h b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.h index 852069979..3a5d0abb5 100644 --- a/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.h +++ b/drivers/gpu/nvgpu/common/gr/ctxsw_prog/ctxsw_prog_gv11b.h @@ -30,8 +30,6 @@ void gv11b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem, void gv11b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem, u64 addr); u32 gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw(void); -u32 gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw(struct gk20a *g, - struct nvgpu_mem *ctx_mem); void gv11b_ctxsw_prog_set_full_preemption_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem, u64 addr); void gv11b_ctxsw_prog_set_full_preemption_ptr_veid0(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/common/gr/subctx.c b/drivers/gpu/nvgpu/common/gr/subctx.c index f9c69bdaa..e3ad439e3 100644 --- a/drivers/gpu/nvgpu/common/gr/subctx.c +++ b/drivers/gpu/nvgpu/common/gr/subctx.c @@ -117,3 +117,10 @@ void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &subctx->ctx_header, gr_ctx->zcull_ctx.gpu_va); } + +void nvgpu_gr_subctx_set_hwpm_mode(struct gk20a *g, + struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx) +{ + g->ops.gr.ctxsw_prog.set_pm_ptr(g, &subctx->ctx_header, + gr_ctx->pm_ctx.gpu_va); +} diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 184b1bc25..76eeb631a 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -1425,10 +1425,8 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, u32 mode) { struct tsg_gk20a *tsg; - struct nvgpu_mem *gr_mem = NULL; struct nvgpu_gr_ctx *gr_ctx; - struct pm_ctx_desc *pm_ctx; - u64 virt_addr = 0; + bool skip_update = false; int ret; nvgpu_log_fn(g, " "); @@ -1439,43 +1437,33 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, } gr_ctx = tsg->gr_ctx; - pm_ctx = &gr_ctx->pm_ctx; - gr_mem = &gr_ctx->mem; - if (!nvgpu_mem_is_valid(gr_mem)) { - nvgpu_err(g, "no graphics context allocated"); - return -EFAULT; + + if (mode != NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW) { + nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc, + NVGPU_GR_CTX_PM_CTX, + g->gr.ctx_vars.pm_ctxsw_image_size); + + ret = nvgpu_gr_ctx_alloc_pm_ctx(g, gr_ctx, + g->gr.gr_ctx_desc, c->vm, + gpu_va); + if (ret != 0) { + nvgpu_err(g, + "failed to allocate pm ctxt buffer"); + return ret; + } + + if ((mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) && + (g->ops.gr.init_hwpm_pmm_register != NULL)) { + g->ops.gr.init_hwpm_pmm_register(g); + } } - if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) && - (g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw == - NULL)) { - nvgpu_err(g, - "Mode-E hwpm context switch mode is not supported"); - return -EINVAL; + ret = nvgpu_gr_ctx_prepare_hwpm_mode(g, gr_ctx, mode, &skip_update); + if (ret != 0) { + return ret; } - - switch (mode) { - case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW: - if (pm_ctx->pm_mode == - g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw()) { - return 0; - } - break; - case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW: - if (pm_ctx->pm_mode == - g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) { - return 0; - } - break; - case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW: - if (pm_ctx->pm_mode == - g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) { - return 0; - } - break; - default: - nvgpu_err(g, "invalid hwpm context switch mode"); - return -EINVAL; + if (skip_update) { + return 0; } ret = gk20a_disable_channel_tsg(g, c); @@ -1491,72 +1479,26 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, return ret; } - /* Channel gr_ctx buffer is gpu cacheable. - Flush and invalidate before cpu update. */ - ret = g->ops.mm.l2_flush(g, true); - if (ret != 0) { - nvgpu_err(g, "l2_flush failed"); - return ret; - } - - if (mode != NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) { - /* Allocate buffer if necessary */ - if (pm_ctx->mem.gpu_va == 0ULL) { - nvgpu_gr_ctx_set_size(g->gr.gr_ctx_desc, - NVGPU_GR_CTX_PM_CTX, - g->gr.ctx_vars.pm_ctxsw_image_size); - - ret = nvgpu_gr_ctx_alloc_pm_ctx(g, gr_ctx, - g->gr.gr_ctx_desc, c->vm, - gpu_va); - if (ret != 0) { - c->g->ops.fifo.enable_channel(c); - nvgpu_err(g, - "failed to allocate pm ctxt buffer"); - return ret; - } - } - - if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) && - (g->ops.gr.init_hwpm_pmm_register != NULL)) { - g->ops.gr.init_hwpm_pmm_register(g); - } - } - - switch (mode) { - case NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW: - pm_ctx->pm_mode = - g->ops.gr.ctxsw_prog.set_pm_mode_ctxsw(g, gr_mem); - virt_addr = pm_ctx->mem.gpu_va; - break; - case NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW: - pm_ctx->pm_mode = - g->ops.gr.ctxsw_prog.set_pm_mode_stream_out_ctxsw(g, gr_mem); - virt_addr = pm_ctx->mem.gpu_va; - break; - case NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW: - pm_ctx->pm_mode = - g->ops.gr.ctxsw_prog.set_pm_mode_no_ctxsw(g, gr_mem); - virt_addr = 0; - } - if (c->subctx != NULL) { struct channel_gk20a *ch; nvgpu_rwsem_down_read(&tsg->ch_list_lock); nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) { - g->ops.gr.ctxsw_prog.set_pm_ptr(g, &ch->subctx->ctx_header, - virt_addr); + ret = nvgpu_gr_ctx_set_hwpm_mode(g, gr_ctx, false); + if (ret == 0) { + nvgpu_gr_subctx_set_hwpm_mode(g, ch->subctx, + gr_ctx); + } } nvgpu_rwsem_up_read(&tsg->ch_list_lock); } else { - g->ops.gr.ctxsw_prog.set_pm_ptr(g, gr_mem, virt_addr); + ret = nvgpu_gr_ctx_set_hwpm_mode(g, gr_ctx, true); } /* enable channel */ gk20a_enable_channel_tsg(g, c); - return 0; + return ret; } static void gr_gk20a_start_falcon_ucode(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index b52d4133a..2296c27e3 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -364,9 +364,6 @@ static const struct gpu_ops gm20b_ops = { .set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr, .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, - .set_pm_mode_no_ctxsw = - gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, - .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, .hw_get_pm_mode_no_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index e1b43badb..4dc1d5c61 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -403,9 +403,6 @@ static const struct gpu_ops gp10b_ops = { .set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr, .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, - .set_pm_mode_no_ctxsw = - gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, - .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, .hw_get_pm_mode_no_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 34edc3a80..09a0d5146 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -522,16 +522,11 @@ static const struct gpu_ops gv100_ops = { .set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr, .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, - .set_pm_mode_no_ctxsw = - gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, - .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, .hw_get_pm_mode_no_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, .hw_get_pm_mode_stream_out_ctxsw = gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw, - .set_pm_mode_stream_out_ctxsw = - gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw, .init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data, .set_compute_preemption_mode_cta = gp10b_ctxsw_prog_set_compute_preemption_mode_cta, diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index c9ae4ec8c..2bfaddec0 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -483,16 +483,11 @@ static const struct gpu_ops gv11b_ops = { .set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr, .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, - .set_pm_mode_no_ctxsw = - gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, - .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, .hw_get_pm_mode_no_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, .hw_get_pm_mode_stream_out_ctxsw = gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw, - .set_pm_mode_stream_out_ctxsw = - gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw, .init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data, .set_compute_preemption_mode_cta = gp10b_ctxsw_prog_set_compute_preemption_mode_cta, diff --git a/drivers/gpu/nvgpu/include/nvgpu/debugger.h b/drivers/gpu/nvgpu/include/nvgpu/debugger.h index 04921b676..438df2fb9 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/debugger.h +++ b/drivers/gpu/nvgpu/include/nvgpu/debugger.h @@ -134,12 +134,4 @@ void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s); u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s, bool mode); - /* PM Context Switch Mode */ -/*This mode says that the pms are not to be context switched. */ -#define NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW (0x00000000U) -/* This mode says that the pms in Mode-B are to be context switched */ -#define NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW (0x00000001U) -/* This mode says that the pms in Mode-E (stream out) are to be context switched. */ -#define NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW (0x00000002U) - #endif /* NVGPU_DEBUGGER_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 2db768c29..334ffbf33 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -559,12 +559,6 @@ struct gpu_ops { struct nvgpu_mem *ctx_mem, u32 mode); void (*set_pm_smpc_mode)(struct gk20a *g, struct nvgpu_mem *ctx_mem, bool enable); - u32 (*set_pm_mode_no_ctxsw)(struct gk20a *g, - struct nvgpu_mem *ctx_mem); - u32 (*set_pm_mode_ctxsw)(struct gk20a *g, - struct nvgpu_mem *ctx_mem); - u32 (*set_pm_mode_stream_out_ctxsw)(struct gk20a *g, - struct nvgpu_mem *ctx_mem); u32 (*hw_get_pm_mode_no_ctxsw)(void); u32 (*hw_get_pm_mode_ctxsw)(void); u32 (*hw_get_pm_mode_stream_out_ctxsw)(void); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h index 49c95f246..4e18b33ec 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h @@ -73,8 +73,17 @@ struct zcull_ctx_desc { u32 ctx_sw_mode; }; + /* PM Context Switch Mode */ +/*This mode says that the pms are not to be context switched. */ +#define NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW (0x00000000U) +/* This mode says that the pms in Mode-B are to be context switched */ +#define NVGPU_GR_CTX_HWPM_CTXSW_MODE_CTXSW (0x00000001U) +/* This mode says that the pms in Mode-E (stream out) are to be context switched. */ +#define NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW (0x00000002U) + struct pm_ctx_desc { struct nvgpu_mem mem; + u64 gpu_va; u32 pm_mode; }; @@ -185,4 +194,10 @@ int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, bool enable); + +int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + u32 mode, bool *skip_update); +int nvgpu_gr_ctx_set_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, + bool set_pm_ptr); + #endif /* NVGPU_INCLUDE_GR_CTX_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h b/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h index 8400ddffd..85f156ae8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h @@ -46,4 +46,7 @@ void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g, void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx); +void nvgpu_gr_subctx_set_hwpm_mode(struct gk20a *g, + struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx); + #endif /* NVGPU_GR_SUBCTX_H */ diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index 03a851094..9765c2f5f 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -35,6 +35,7 @@ #include #include #include +#include #include @@ -1007,11 +1008,11 @@ static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode) nvgpu_speculation_barrier(); switch (mode){ case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW: - return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW; + return NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW; case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_CTXSW: - return NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW; + return NVGPU_GR_CTX_HWPM_CTXSW_MODE_CTXSW; case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW: - return NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW; + return NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW; } return mode; diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 235159877..7f311cac8 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -547,16 +547,11 @@ static const struct gpu_ops tu104_ops = { .set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr, .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, - .set_pm_mode_no_ctxsw = - gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, - .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, .hw_get_pm_mode_no_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, .hw_get_pm_mode_stream_out_ctxsw = gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw, - .set_pm_mode_stream_out_ctxsw = - gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw, .init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data, .set_compute_preemption_mode_cta = gm20b_ctxsw_prog_set_compute_preemption_mode_cta, diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index 0e5963c39..e67d33433 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c @@ -256,9 +256,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr, .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, - .set_pm_mode_no_ctxsw = - gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, - .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, .hw_get_pm_mode_no_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index 2ae690910..b2c692d5c 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -1125,7 +1125,7 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, ch_ctx = tsg->gr_ctx; pm_ctx = &ch_ctx->pm_ctx; - if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW) { + if (mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_CTXSW) { /* * send command to enable HWPM only once - otherwise server * will return an error due to using the same GPU VA twice. @@ -1136,13 +1136,13 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, return 0; } p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; - } else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) { + } else if (mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW) { if (pm_ctx->pm_mode == g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw()) { return 0; } p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; - } else if ((mode == NVGPU_DBG_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) && + } else if ((mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW) && g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) { if (pm_ctx->pm_mode == g->ops.gr.ctxsw_prog.hw_get_pm_mode_stream_out_ctxsw()) { @@ -1154,7 +1154,7 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, return -EINVAL; } - if (mode != NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) { + if (mode != NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW) { /* Allocate buffer if necessary */ if (pm_ctx->mem.gpu_va == 0) { pm_ctx->mem.gpu_va = nvgpu_vm_alloc_va(ch->vm, @@ -1177,10 +1177,10 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, WARN_ON(err || msg.ret); err = err ? err : msg.ret; if (!err) { - if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_CTXSW) { + if (mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_CTXSW) { pm_ctx->pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_ctxsw(); - } else if (mode == NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW) { + } else if (mode == NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW) { pm_ctx->pm_mode = g->ops.gr.ctxsw_prog.hw_get_pm_mode_no_ctxsw(); } else { diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index f9b272279..444fb29ee 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c @@ -290,16 +290,11 @@ static const struct gpu_ops vgpu_gv11b_ops = { .set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr, .set_pm_mode = gm20b_ctxsw_prog_set_pm_mode, .set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode, - .set_pm_mode_no_ctxsw = - gm20b_ctxsw_prog_set_pm_mode_no_ctxsw, - .set_pm_mode_ctxsw = gm20b_ctxsw_prog_set_pm_mode_ctxsw, .hw_get_pm_mode_no_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw, .hw_get_pm_mode_ctxsw = gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw, .hw_get_pm_mode_stream_out_ctxsw = gv11b_ctxsw_prog_hw_get_pm_mode_stream_out_ctxsw, - .set_pm_mode_stream_out_ctxsw = - gv11b_ctxsw_prog_set_pm_mode_stream_out_ctxsw, .init_ctxsw_hdr_data = gp10b_ctxsw_prog_init_ctxsw_hdr_data, .set_compute_preemption_mode_cta = gp10b_ctxsw_prog_set_compute_preemption_mode_cta,