diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 1a9bee5f8..512d32e98 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -110,7 +110,6 @@ struct zcull_ctx_desc { struct pm_ctx_desc { struct mem_desc mem; u32 pm_mode; - bool ctx_was_enabled; /* Used in the virtual case only */ }; struct gk20a; diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index 01f5e1a5a..65e3589bd 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -91,8 +91,10 @@ int vgpu_gr_init_ctx_state(struct gk20a *g) g->gr.ctx_vars.golden_image_size = priv->constants.golden_ctx_size; g->gr.ctx_vars.zcull_ctxsw_image_size = priv->constants.zcull_ctx_size; + g->gr.ctx_vars.pm_ctxsw_image_size = priv->constants.hwpm_ctx_size; if (!g->gr.ctx_vars.golden_image_size || - !g->gr.ctx_vars.zcull_ctxsw_image_size) + !g->gr.ctx_vars.zcull_ctxsw_image_size || + !g->gr.ctx_vars.pm_ctxsw_image_size) return -ENXIO; gr->ctx_vars.buffer_size = g->gr.ctx_vars.golden_image_size; @@ -390,12 +392,13 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_channel_free_hwpm_ctx *p = &msg.params.free_hwpm_ctx; struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; + struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; int err; gk20a_dbg_fn(""); /* check if hwpm was ever initialized. If not, nothing to do */ - if (ch_ctx->pm_ctx.ctx_was_enabled == false) + if (pm_ctx->mem.gpu_va == 0) return; msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX; @@ -404,7 +407,8 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); WARN_ON(err || msg.ret); - ch_ctx->pm_ctx.ctx_was_enabled = false; + gk20a_vm_free_va(c->vm, pm_ctx->mem.gpu_va, pm_ctx->mem.size, 0); + pm_ctx->mem.gpu_va = 0; } static void vgpu_gr_free_channel_ctx(struct channel_gk20a *c) @@ -1019,27 +1023,34 @@ static int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g, static int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, struct channel_gk20a *ch, bool enable) { + struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; + struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; int err; gk20a_dbg_fn(""); + if (enable) { + p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; + + /* Allocate buffer if necessary */ + if (pm_ctx->mem.gpu_va == 0) { + pm_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch->vm, + g->gr.ctx_vars.pm_ctxsw_image_size, + gmmu_page_size_kernel); + + if (!pm_ctx->mem.gpu_va) + return -ENOMEM; + pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size; + } + } else + p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; + msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; msg.handle = vgpu_get_handle(g); p->handle = ch->virt_ctx; - - /* If we just enabled HWPM context switching, flag this - * so we know we need to free the buffer when channel contexts - * are cleaned up. - */ - if (enable) { - struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; - ch_ctx->pm_ctx.ctx_was_enabled = true; - - p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; - } else - p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; + p->gpu_va = pm_ctx->mem.gpu_va; err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); WARN_ON(err || msg.ret); diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h index be8b9ad18..456622a48 100644 --- a/include/linux/tegra_vgpu.h +++ b/include/linux/tegra_vgpu.h @@ -348,6 +348,7 @@ enum { struct tegra_vgpu_channel_set_ctxsw_mode { u64 handle; + u64 gpu_va; u32 mode; }; @@ -436,6 +437,7 @@ struct tegra_vgpu_constants_params { * TEGRA_VGPU_MAX_TPC_COUNT_PER_GPC */ u16 gpc_tpc_mask[TEGRA_VGPU_MAX_GPC_COUNT]; + u32 hwpm_ctx_size; }; struct tegra_vgpu_channel_cyclestats_snapshot_params {