diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 1a9bee5f8..512d32e98 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -110,7 +110,6 @@ struct zcull_ctx_desc {
 struct pm_ctx_desc {
 	struct mem_desc mem;
 	u32 pm_mode;
-	bool ctx_was_enabled;	/* Used in the virtual case only */
 };
 
 struct gk20a;
diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
index 01f5e1a5a..65e3589bd 100644
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -91,8 +91,10 @@ int vgpu_gr_init_ctx_state(struct gk20a *g)
 
 	g->gr.ctx_vars.golden_image_size = priv->constants.golden_ctx_size;
 	g->gr.ctx_vars.zcull_ctxsw_image_size = priv->constants.zcull_ctx_size;
+	g->gr.ctx_vars.pm_ctxsw_image_size = priv->constants.hwpm_ctx_size;
 	if (!g->gr.ctx_vars.golden_image_size ||
-		!g->gr.ctx_vars.zcull_ctxsw_image_size)
+		!g->gr.ctx_vars.zcull_ctxsw_image_size ||
+		!g->gr.ctx_vars.pm_ctxsw_image_size)
 		return -ENXIO;
 
 	gr->ctx_vars.buffer_size = g->gr.ctx_vars.golden_image_size;
@@ -390,12 +392,13 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c)
 	struct tegra_vgpu_cmd_msg msg;
 	struct tegra_vgpu_channel_free_hwpm_ctx *p = &msg.params.free_hwpm_ctx;
 	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
+	struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
 	int err;
 
 	gk20a_dbg_fn("");
 
 	/* check if hwpm was ever initialized. If not, nothing to do */
-	if (ch_ctx->pm_ctx.ctx_was_enabled == false)
+	if (pm_ctx->mem.gpu_va == 0)
 		return;
 
 	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX;
@@ -404,7 +407,8 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c)
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	WARN_ON(err || msg.ret);
 
-	ch_ctx->pm_ctx.ctx_was_enabled = false;
+	gk20a_vm_free_va(c->vm, pm_ctx->mem.gpu_va, pm_ctx->mem.size, 0);
+	pm_ctx->mem.gpu_va = 0;
 }
 
 static void vgpu_gr_free_channel_ctx(struct channel_gk20a *c)
@@ -1019,27 +1023,34 @@ static int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
 static int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
 	struct channel_gk20a *ch, bool enable)
 {
+	struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
+	struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
 	struct tegra_vgpu_cmd_msg msg;
 	struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode;
 	int err;
 
 	gk20a_dbg_fn("");
 
+	if (enable) {
+		p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
+
+		/* Allocate buffer if necessary */
+		if (pm_ctx->mem.gpu_va == 0) {
+			pm_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch->vm,
+					g->gr.ctx_vars.pm_ctxsw_image_size,
+					gmmu_page_size_kernel);
+
+			if (!pm_ctx->mem.gpu_va)
+				return -ENOMEM;
+			pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size;
+		}
+	} else
+		p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
+
 	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE;
 	msg.handle = vgpu_get_handle(g);
 	p->handle = ch->virt_ctx;
-
-	/* If we just enabled HWPM context switching, flag this
-	 * so we know we need to free the buffer when channel contexts
-	 * are cleaned up.
-	 */
-	if (enable) {
-		struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
-		ch_ctx->pm_ctx.ctx_was_enabled = true;
-
-		p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
-	} else
-		p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
+	p->gpu_va = pm_ctx->mem.gpu_va;
 
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	WARN_ON(err || msg.ret);
diff --git a/include/linux/tegra_vgpu.h b/include/linux/tegra_vgpu.h
index be8b9ad18..456622a48 100644
--- a/include/linux/tegra_vgpu.h
+++ b/include/linux/tegra_vgpu.h
@@ -348,6 +348,7 @@ enum {
 
 struct tegra_vgpu_channel_set_ctxsw_mode {
 	u64 handle;
+	u64 gpu_va;
 	u32 mode;
 };
 
@@ -436,6 +437,7 @@ struct tegra_vgpu_constants_params {
 	 * TEGRA_VGPU_MAX_TPC_COUNT_PER_GPC
 	 */
 	u16 gpc_tpc_mask[TEGRA_VGPU_MAX_GPC_COUNT];
+	u32 hwpm_ctx_size;
 };
 
 struct tegra_vgpu_channel_cyclestats_snapshot_params {