diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c
index 672d6a7c6..551825a6b 100644
--- a/drivers/gpu/nvgpu/common/gr/gr.c
+++ b/drivers/gpu/nvgpu/common/gr/gr.c
@@ -1057,7 +1057,7 @@ u32 nvgpu_gr_get_syspipe_id(struct gk20a *g, u32 gr_instance_id)
  */
 int nvgpu_gr_disable_ctxsw(struct gk20a *g)
 {
-	struct nvgpu_gr *gr = g->gr;
+	struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
 	int err = 0;
 
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
@@ -1104,7 +1104,7 @@ out:
 /* Start processing (continue) context switches at FECS */
 int nvgpu_gr_enable_ctxsw(struct gk20a *g)
 {
-	struct nvgpu_gr *gr = g->gr;
+	struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
 	int err = 0;
 
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
@@ -1158,7 +1158,9 @@ void nvgpu_gr_sw_ready(struct gk20a *g, bool enable)
 /* Wait until GR is initialized */
 void nvgpu_gr_wait_initialized(struct gk20a *g)
 {
-	NVGPU_COND_WAIT(&g->gr->init_wq, g->gr->initialized, 0U);
+	struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
+
+	NVGPU_COND_WAIT(&gr->init_wq, gr->initialized, 0U);
 }
 #endif
 
diff --git a/drivers/gpu/nvgpu/common/gr/gr_utils.c b/drivers/gpu/nvgpu/common/gr/gr_utils.c
index dbfc7e0db..1242c53be 100644
--- a/drivers/gpu/nvgpu/common/gr/gr_utils.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_utils.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -111,21 +111,29 @@ void nvgpu_gr_clear_cilp_preempt_pending_chid(struct gk20a *g)
 struct nvgpu_gr_obj_ctx_golden_image *nvgpu_gr_get_golden_image_ptr(
 		struct gk20a *g)
 {
-	return g->gr->golden_image;
+	struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
+
+	return gr->golden_image;
 }
 
 struct nvgpu_gr_hwpm_map *nvgpu_gr_get_hwpm_map_ptr(struct gk20a *g)
 {
-	return g->gr->hwpm_map;
+	struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
+
+	return gr->hwpm_map;
 }
 
 void nvgpu_gr_reset_falcon_ptr(struct gk20a *g)
 {
-	g->gr->falcon = NULL;
+	struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
+
+	gr->falcon = NULL;
 }
 
 void nvgpu_gr_reset_golden_image_ptr(struct gk20a *g)
 {
-	g->gr->golden_image = NULL;
+	struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
+
+	gr->golden_image = NULL;
 }
 #endif
diff --git a/drivers/gpu/nvgpu/common/profiler/profiler.c b/drivers/gpu/nvgpu/common/profiler/profiler.c
index f3e1eb5dc..90eab9777 100644
--- a/drivers/gpu/nvgpu/common/profiler/profiler.c
+++ b/drivers/gpu/nvgpu/common/profiler/profiler.c
@@ -35,6 +35,8 @@
 #include <nvgpu/regops_allowlist.h>
 #include <nvgpu/regops.h>
 #include <nvgpu/sort.h>
+#include <nvgpu/gr/gr_instances.h>
+#include <nvgpu/grmgr.h>
 
 #if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT)
 #include "nvgpu_next_gpuid.h"
@@ -51,7 +53,8 @@ static int generate_unique_id(void)
 
 int nvgpu_profiler_alloc(struct gk20a *g,
 	struct nvgpu_profiler_object **_prof,
-	enum nvgpu_profiler_pm_reservation_scope scope)
+	enum nvgpu_profiler_pm_reservation_scope scope,
+	u32 gpu_instance_id)
 {
 	struct nvgpu_profiler_object *prof;
 	*_prof = NULL;
@@ -65,6 +68,7 @@ int nvgpu_profiler_alloc(struct gk20a *g,
 
 	prof->prof_handle = generate_unique_id();
 	prof->scope = scope;
+	prof->gpu_instance_id = gpu_instance_id;
 	prof->g = g;
 
 	nvgpu_mutex_init(&prof->ioctl_lock);
@@ -89,6 +93,7 @@ void nvgpu_profiler_free(struct nvgpu_profiler_object *prof)
 	nvgpu_profiler_free_pma_stream(prof);
 
 	nvgpu_list_del(&prof->prof_obj_entry);
+	prof->gpu_instance_id = 0U;
 	nvgpu_kfree(g, prof);
 }
 
@@ -297,6 +302,8 @@ static int nvgpu_profiler_bind_smpc(struct nvgpu_profiler_object *prof)
 {
 	struct gk20a *g = prof->g;
 	int err;
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, prof->gpu_instance_id);
 
 	if (prof->scope == NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE) {
 		if (prof->ctxsw[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC]) {
@@ -306,11 +313,13 @@ static int nvgpu_profiler_bind_smpc(struct nvgpu_profiler_object *prof)
 			}
 
 			if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SMPC_GLOBAL_MODE)) {
-				err = g->ops.gr.update_smpc_global_mode(g, false);
+				err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+						g->ops.gr.update_smpc_global_mode(g, false));
 			}
 		} else {
 			if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SMPC_GLOBAL_MODE)) {
-				err = g->ops.gr.update_smpc_global_mode(g, true);
+				err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+						g->ops.gr.update_smpc_global_mode(g, true));
 			} else {
 				err = -EINVAL;
 			}
@@ -350,13 +359,17 @@ static int nvgpu_profiler_bind_hwpm(struct nvgpu_profiler_object *prof, bool str
 	int err = 0;
 	u32 mode = streamout ? NVGPU_GR_CTX_HWPM_CTXSW_MODE_STREAM_OUT_CTXSW :
 			       NVGPU_GR_CTX_HWPM_CTXSW_MODE_CTXSW;
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, prof->gpu_instance_id);
 
 	if (prof->scope == NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE) {
 		if (prof->ctxsw[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) {
-			err = g->ops.gr.update_hwpm_ctxsw_mode(g, prof->tsg, 0, mode);
+			err = g->ops.gr.update_hwpm_ctxsw_mode(
+					g, gr_instance_id, prof->tsg, 0, mode);
 		} else {
 			if (g->ops.gr.init_cau != NULL) {
-				g->ops.gr.init_cau(g);
+				nvgpu_gr_exec_for_instance(g, gr_instance_id,
+					g->ops.gr.init_cau(g));
 			}
 			if (g->ops.perf.reset_hwpm_pmm_registers != NULL) {
 				g->ops.perf.reset_hwpm_pmm_registers(g);
@@ -364,7 +377,8 @@ static int nvgpu_profiler_bind_hwpm(struct nvgpu_profiler_object *prof, bool str
 			g->ops.perf.init_hwpm_pmm_register(g);
 		}
 	} else {
-		err = g->ops.gr.update_hwpm_ctxsw_mode(g, prof->tsg, 0, mode);
+		err = g->ops.gr.update_hwpm_ctxsw_mode(
+				g, gr_instance_id, prof->tsg, 0, mode);
 	}
 
 	return err;
@@ -375,23 +389,49 @@ static int nvgpu_profiler_unbind_hwpm(struct nvgpu_profiler_object *prof)
 	struct gk20a *g = prof->g;
 	int err = 0;
 	u32 mode = NVGPU_GR_CTX_HWPM_CTXSW_MODE_NO_CTXSW;
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, prof->gpu_instance_id);
 
 	if (prof->scope == NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE) {
 		if (prof->ctxsw[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) {
-			err = g->ops.gr.update_hwpm_ctxsw_mode(g, prof->tsg, 0, mode);
+			err = g->ops.gr.update_hwpm_ctxsw_mode(
+					g, gr_instance_id, prof->tsg, 0, mode);
 		}
 	} else {
-		err = g->ops.gr.update_hwpm_ctxsw_mode(g, prof->tsg, 0, mode);
+		err = g->ops.gr.update_hwpm_ctxsw_mode(
+				g, gr_instance_id, prof->tsg, 0, mode);
 	}
 
 	return err;
 }
 
+static void nvgpu_profiler_disable_cau_and_smpc(
+	struct nvgpu_profiler_object *prof)
+{
+	struct gk20a *g = prof->g;
+
+	/* Disable CAUs */
+	if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY] &&
+			prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC] &&
+			g->ops.gr.disable_cau != NULL) {
+		g->ops.gr.disable_cau(g);
+	}
+
+	/* Disable SMPC */
+	if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC] &&
+			g->ops.gr.disable_smpc != NULL) {
+		g->ops.gr.disable_smpc(g);
+	}
+
+}
+
 static int nvgpu_profiler_quiesce_hwpm_streamout_resident(struct nvgpu_profiler_object *prof)
 {
 	struct gk20a *g = prof->g;
 	u64 bytes_available;
 	int err = 0;
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, prof->gpu_instance_id);
 
 	nvgpu_log(g, gpu_dbg_prof,
 		"HWPM streamout quiesce in resident state started for handle %u",
@@ -405,18 +445,8 @@ static int nvgpu_profiler_quiesce_hwpm_streamout_resident(struct nvgpu_profiler_
 		g->ops.perf.disable_all_perfmons(g);
 	}
 
-	/* Disable CAUs */
-	if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY] &&
-	    prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC] &&
-	    g->ops.gr.disable_cau != NULL) {
-		g->ops.gr.disable_cau(g);
-	}
-
-	/* Disable SMPC */
-	if (prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC] &&
-	    g->ops.gr.disable_smpc != NULL) {
-		g->ops.gr.disable_smpc(g);
-	}
+	nvgpu_gr_exec_for_instance(g, gr_instance_id,
+		nvgpu_profiler_disable_cau_and_smpc(prof));
 
 	/* Wait for routers to idle/quiescent */
 	err = g->ops.perf.wait_for_idle_pmm_routers(g);
@@ -481,11 +511,11 @@ static int nvgpu_profiler_quiesce_hwpm_streamout_non_resident(struct nvgpu_profi
 	return 0;
 }
 
-static int nvgpu_profiler_quiesce_hwpm_streamout(struct nvgpu_profiler_object *prof)
+static int nvgpu_profiler_disable_ctxsw_and_check_is_tsg_ctx_resident(
+	struct nvgpu_profiler_object *prof)
 {
 	struct gk20a *g = prof->g;
-	bool ctx_resident;
-	int err, ctxsw_err;
+	int err;
 
 	err = nvgpu_gr_disable_ctxsw(g);
 	if (err != 0) {
@@ -493,7 +523,19 @@ static int nvgpu_profiler_quiesce_hwpm_streamout(struct nvgpu_profiler_object *p
 		return err;
 	}
 
-	ctx_resident = g->ops.gr.is_tsg_ctx_resident(prof->tsg);
+	return g->ops.gr.is_tsg_ctx_resident(prof->tsg);
+}
+
+static int nvgpu_profiler_quiesce_hwpm_streamout(struct nvgpu_profiler_object *prof)
+{
+	struct gk20a *g = prof->g;
+	bool ctx_resident;
+	int err, ctxsw_err;
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, prof->gpu_instance_id);
+
+	ctx_resident = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+		nvgpu_profiler_disable_ctxsw_and_check_is_tsg_ctx_resident(prof));
 
 	if (ctx_resident) {
 		err = nvgpu_profiler_quiesce_hwpm_streamout_resident(prof);
@@ -504,7 +546,8 @@ static int nvgpu_profiler_quiesce_hwpm_streamout(struct nvgpu_profiler_object *p
 		nvgpu_err(g, "Failed to quiesce HWPM streamout");
 	}
 
-	ctxsw_err = nvgpu_gr_enable_ctxsw(g);
+	ctxsw_err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+					nvgpu_gr_enable_ctxsw(g));
 	if (ctxsw_err != 0) {
 		nvgpu_err(g, "unable to restart ctxsw!");
 		err = ctxsw_err;
diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c
index af4ceff60..1f0eceb51 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c
@@ -1,7 +1,7 @@
 /*
  * Virtualized GPU Graphics
  *
- * Copyright (c) 2014-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -915,7 +915,7 @@ int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
 }
 
 int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
-	struct nvgpu_tsg *tsg, u64 gpu_va, u32 mode)
+	u32 gr_instance_id, struct nvgpu_tsg *tsg, u64 gpu_va, u32 mode)
 {
 	struct nvgpu_gr_ctx *gr_ctx;
 	struct tegra_vgpu_cmd_msg msg;
diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h
index c6bf6f4da..421a5b09b 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -72,7 +72,7 @@ int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
 int vgpu_gr_set_sm_debug_mode(struct gk20a *g,
 	struct nvgpu_channel *ch, u64 sms, bool enable);
 int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
-	struct nvgpu_tsg *tsg, u64 gpu_va, u32 mode);
+	u32 gr_instance_id, struct nvgpu_tsg *tsg, u64 gpu_va, u32 mode);
 int vgpu_gr_clear_sm_error_state(struct gk20a *g,
 		struct nvgpu_channel *ch, u32 sm_id);
 int vgpu_gr_suspend_contexts(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
index b4de0b006..8ee146fc5 100644
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
@@ -77,6 +77,7 @@ out:
 }
 
 int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
+				  u32 gr_instance_id,
 				  struct nvgpu_tsg *tsg,
 				  u64 gpu_va,
 				  u32 mode)
@@ -86,7 +87,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 	bool skip_update = false;
 	int err;
 	int ret;
-	struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
+	struct nvgpu_gr *gr = nvgpu_gr_get_instance_ptr(g, gr_instance_id);
 
 	nvgpu_log_fn(g, " ");
 
diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.h b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.h
index fd69e7fc6..7ff1ef7a5 100644
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.h
@@ -1,7 +1,7 @@
 /*
  * GK20A Graphics Engine
  *
- * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -56,6 +56,7 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
 				    struct nvgpu_tsg *tsg,
 				    bool enable_smpc_ctxsw);
 int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
+				  u32 gr_instance_id,
 				  struct nvgpu_tsg *tsg,
 				  u64 gpu_va, u32 mode);
 int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/debugger.h b/drivers/gpu/nvgpu/include/nvgpu/debugger.h
index e48723bf6..ccbe5606d 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/debugger.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/debugger.h
@@ -1,7 +1,7 @@
 /*
  * Tegra GK20A GPU Debugger Driver
  *
- * Copyright (c) 2013-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2013-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -76,6 +76,9 @@ struct dbg_session_gk20a {
 	 * profilers.
 	 */
 	struct nvgpu_profiler_object *prof;
+
+	/** GPU instance Id */
+	u32 gpu_instance_id;
 };
 
 struct dbg_session_data {
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
index 4ea425389..71df59e49 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/gr.h
@@ -1140,6 +1140,7 @@ struct gops_gr {
 	int (*update_smpc_global_mode)(struct gk20a *g,
 				      bool enable);
 	int (*update_hwpm_ctxsw_mode)(struct gk20a *g,
+				      u32 gr_instance_id,
 				      struct nvgpu_tsg *tsg,
 				      u64 gpu_va,
 				      u32 mode);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_instances.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_instances.h
index 355c94113..7ebc60f73 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_instances.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_instances.h
@@ -27,6 +27,7 @@
 #include <nvgpu/grmgr.h>
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/lock.h>
+#include <nvgpu/bug.h>
 
 #ifdef CONFIG_NVGPU_MIG
 #define nvgpu_gr_get_cur_instance_id(g) \
@@ -51,6 +52,12 @@
 		&g->gr[current_gr_instance_id]; \
 	})
 
+#define nvgpu_gr_get_instance_ptr(g, gr_instance_id) \
+	({ \
+		nvgpu_assert(gr_instance_id < g->num_gr_instances); \
+		&g->gr[gr_instance_id]; \
+	})
+
 #ifdef CONFIG_NVGPU_MIG
 #define nvgpu_gr_exec_for_each_instance(g, func) \
 	({ \
@@ -129,7 +136,11 @@
 		} \
 	})
 #else
-#define nvgpu_gr_exec_for_instance(g, gr_instance_id, func)	(func)
+#define nvgpu_gr_exec_for_instance(g, gr_instance_id, func) \
+	({ \
+		nvgpu_assert(gr_instance_id == 0U); \
+		(func); \
+	})
 #endif
 
 #ifdef CONFIG_NVGPU_MIG
@@ -149,7 +160,10 @@
 	})
 #else
 #define nvgpu_gr_exec_with_ret_for_instance(g, gr_instance_id, func, type) \
-		(func)
+	({ \
+		nvgpu_assert(gr_instance_id == 0U); \
+		(func); \
+	})
 #endif
 
 #ifdef CONFIG_NVGPU_MIG
@@ -161,7 +175,11 @@
 		err; \
 	})
 #else
-#define nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id, func)	(func)
+#define nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id, func) \
+	({ \
+		nvgpu_assert(gr_instance_id == 0U); \
+		(func); \
+	})
 #endif
 
 #ifdef CONFIG_NVGPU_MIG
@@ -182,10 +200,11 @@
 		gr_config; \
 	})
 #else
-#define nvgpu_gr_get_gpu_instance_config_ptr(g, gr_instance_id) \
+#define nvgpu_gr_get_gpu_instance_config_ptr(g, gpu_instance_id) \
 	({ \
-		struct nvgpu_gr_config *gr_instance_gr_config = \
-			nvgpu_gr_get_config_ptr(g); \
+		struct nvgpu_gr_config *gr_instance_gr_config; \
+		nvgpu_assert(gpu_instance_id == 0U); \
+		gr_instance_gr_config = nvgpu_gr_get_config_ptr(g); \
 		gr_instance_gr_config; \
 	})
 #endif
diff --git a/drivers/gpu/nvgpu/include/nvgpu/profiler.h b/drivers/gpu/nvgpu/include/nvgpu/profiler.h
index 3c2280b89..cd01b7a3b 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/profiler.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/profiler.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -121,6 +121,9 @@ struct nvgpu_profiler_object {
 
 	/* NVGPU_DBG_REG_OP_TYPE_* for each HWPM resource */
 	u32 reg_op_type[NVGPU_HWPM_REGISTER_TYPE_COUNT];
+
+	/** GPU instance Id */
+	u32 gpu_instance_id;
 };
 
 static inline struct nvgpu_profiler_object *
@@ -132,7 +135,7 @@ nvgpu_profiler_object_from_prof_obj_entry(struct nvgpu_list_node *node)
 
 int nvgpu_profiler_alloc(struct gk20a *g,
 	struct nvgpu_profiler_object **_prof,
-	enum nvgpu_profiler_pm_reservation_scope scope);
+	enum nvgpu_profiler_pm_reservation_scope scope, u32 gpu_instance_id);
 void nvgpu_profiler_free(struct nvgpu_profiler_object *prof);
 
 int nvgpu_profiler_bind_context(struct nvgpu_profiler_object *prof,
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index b63d28513..074f24f26 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -60,6 +60,11 @@
 
 #include "common/gr/ctx_priv.h"
 
+#include <nvgpu/gr/gr_utils.h>
+#include <nvgpu/gr/gr_instances.h>
+#include <nvgpu/grmgr.h>
+#include <nvgpu/bug.h>
+
 struct dbg_session_gk20a_linux {
 	struct device	*dev;
 	struct dbg_session_gk20a dbg_s;
@@ -156,7 +161,7 @@ static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s,
 static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s);
 
 static int gk20a_dbg_gpu_do_dev_open(struct gk20a *g,
-		struct file *filp, bool is_profiler);
+		struct file *filp, u32 gpu_instance_id, bool is_profiler);
 
 static int nvgpu_dbg_get_context_buffer(struct gk20a *g, struct nvgpu_mem *ctx_mem,
 		void __user *ctx_buf, u32 ctx_buf_size);
@@ -225,6 +230,7 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
 			nvgpu_profiler_free(prof_obj);
 		}
 	}
+	dbg_s->gpu_instance_id = 0U;
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	nvgpu_mutex_destroy(&dbg_s->ch_list_lock);
@@ -240,12 +246,14 @@ int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp)
 {
 	struct gk20a *g;
 	struct nvgpu_cdev *cdev;
+	u32 gpu_instance_id;
 
 	cdev = container_of(inode->i_cdev, struct nvgpu_cdev, cdev);
 	g = nvgpu_get_gk20a_from_cdev(cdev);
+	gpu_instance_id = nvgpu_get_gpu_instance_id_from_cdev(g, cdev);
 
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
-	return gk20a_dbg_gpu_do_dev_open(g, filp, true /* is profiler */);
+	return gk20a_dbg_gpu_do_dev_open(g, filp, gpu_instance_id, true /* is profiler */);
 }
 
 static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s,
@@ -274,6 +282,8 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
 	struct nvgpu_tsg *tsg;
 	u32 sm_id;
 	int err = 0;
+	struct nvgpu_gr_config *gr_config =
+		nvgpu_gr_get_gpu_instance_config_ptr(g, dbg_s->gpu_instance_id);
 
 	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
 	if (ch == NULL) {
@@ -287,7 +297,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
 	}
 
 	sm_id = args->sm_id;
-	if (sm_id >= g->ops.gr.init.get_no_of_sm(g)) {
+	if (sm_id >= nvgpu_gr_config_get_no_of_sm(gr_config)) {
 		return -EINVAL;
 	}
 
@@ -391,7 +401,7 @@ static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s,
 }
 
 static int gk20a_dbg_gpu_do_dev_open(struct gk20a *g,
-		struct file *filp, bool is_profiler)
+		struct file *filp, u32 gpu_instance_id, bool is_profiler)
 {
 	struct dbg_session_gk20a_linux *dbg_session_linux;
 	struct dbg_session_gk20a *dbg_s;
@@ -418,6 +428,7 @@ static int gk20a_dbg_gpu_do_dev_open(struct gk20a *g,
 	dbg_s->is_profiler = is_profiler;
 	dbg_s->is_pg_disabled = false;
 	dbg_s->is_timeout_disabled = false;
+	dbg_s->gpu_instance_id = gpu_instance_id;
 
 	nvgpu_cond_init(&dbg_s->dbg_events.wait_queue);
 	nvgpu_init_list_node(&dbg_s->ch_list);
@@ -845,6 +856,8 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 	struct nvgpu_channel *ch;
 	struct nvgpu_tsg *tsg = NULL;
 	u32 flags = NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE;
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, dbg_s->gpu_instance_id);
 
 	nvgpu_log_fn(g, "%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops);
 
@@ -938,8 +951,9 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 			if (err)
 				break;
 
-			err = g->ops.regops.exec_regops(g, tsg, NULL,
-				g->dbg_regops_tmp_buf, num_ops, &flags);
+			err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+					g->ops.regops.exec_regops(g, tsg, NULL,
+						g->dbg_regops_tmp_buf, num_ops, &flags));
 
 			if (err) {
 				break;
@@ -1021,6 +1035,8 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 	struct nvgpu_channel *ch_gk20a;
 	struct nvgpu_tsg *tsg;
 	bool global_mode = false;
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, dbg_s->gpu_instance_id);
 
 	nvgpu_log_fn(g, "%s smpc ctxsw mode = %d",
 		     g->name, args->mode);
@@ -1046,8 +1062,9 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 			goto clean_up;
 		}
 
-		err = g->ops.gr.update_smpc_global_mode(g,
-				args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
+		err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+				g->ops.gr.update_smpc_global_mode(g,
+					args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW));
 		if (err) {
 			nvgpu_err(g,
 				  "error (%d) during smpc global mode update", err);
@@ -1060,8 +1077,9 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 			goto clean_up;
 		}
 
-		err = g->ops.gr.update_smpc_ctxsw_mode(g, tsg,
-					args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW);
+		err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+				g->ops.gr.update_smpc_ctxsw_mode(g, tsg,
+					args->mode == NVGPU_DBG_GPU_SMPC_CTXSW_MODE_CTXSW));
 		if (err) {
 			nvgpu_err(g,
 				  "error (%d) during smpc ctxsw mode update", err);
@@ -1105,6 +1123,8 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 	u32 mode = nvgpu_hwpm_ctxsw_mode_to_common_mode(args->mode);
 	struct nvgpu_profiler_object *prof_obj, *tmp_obj;
 	bool reserved = false;
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, dbg_s->gpu_instance_id);
 
 	nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode);
 
@@ -1155,8 +1175,7 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 		goto clean_up;
 	}
 
-	err = g->ops.gr.update_hwpm_ctxsw_mode(g, tsg, 0,
-		mode);
+	err = g->ops.gr.update_hwpm_ctxsw_mode(g, gr_instance_id, tsg, 0, mode);
 
 	if (err)
 		nvgpu_err(g,
@@ -1178,6 +1197,8 @@ static int nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode(
 	struct gk20a *g = dbg_s->g;
 	struct nvgpu_channel *ch;
 	bool enable = (args->mode == NVGPU_DBG_GPU_CTX_MMU_DEBUG_MODE_ENABLED);
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, dbg_s->gpu_instance_id);
 
 	nvgpu_log_fn(g, "mode=%u", args->mode);
 
@@ -1206,7 +1227,8 @@ static int nvgpu_dbg_gpu_ioctl_set_mmu_debug_mode(
 		goto clean_up;
 	}
 
-	err = nvgpu_tsg_set_mmu_debug_mode(ch, enable);
+	err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+			nvgpu_tsg_set_mmu_debug_mode(ch, enable));
 	if (err) {
 		nvgpu_err(g, "set mmu debug mode failed, err=%d", err);
 	}
@@ -1300,7 +1322,7 @@ static int nvgpu_ioctl_allocate_profiler_object(
 		scope = NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE;
 	}
 
-	err = nvgpu_profiler_alloc(g, &prof_obj, scope);
+	err = nvgpu_profiler_alloc(g, &prof_obj, scope, dbg_s->gpu_instance_id);
 	if (err != 0) {
 		goto clean_up;
 	}
@@ -1503,7 +1525,8 @@ static int nvgpu_perfbuf_reserve_pma(struct dbg_session_gk20a *dbg_s)
 
 	/* Legacy profiler only supports global PMA stream */
 	err = nvgpu_profiler_alloc(g, &dbg_s->prof,
-			NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE);
+			NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE,
+			dbg_s->gpu_instance_id);
 	if (err != 0) {
 		nvgpu_err(g, "Failed to allocate profiler object");
 		return err;
@@ -1625,6 +1648,8 @@ static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s,
 {
 	struct nvgpu_channel *ch;
 	struct gk20a *g = dbg_s->g;
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, dbg_s->gpu_instance_id);
 
 	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
 	if (!ch)
@@ -1632,8 +1657,9 @@ static int gk20a_dbg_pc_sampling(struct dbg_session_gk20a *dbg_s,
 
 	nvgpu_log_fn(g, " ");
 
-	return g->ops.gr.update_pc_sampling ?
-		g->ops.gr.update_pc_sampling(ch, args->enable) : -EINVAL;
+	return (g->ops.gr.update_pc_sampling ?
+		nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+			g->ops.gr.update_pc_sampling(ch, args->enable)) : -EINVAL);
 }
 
 static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(
@@ -1644,6 +1670,10 @@ static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(
 	u32 sm_id;
 	struct nvgpu_channel *ch;
 	int err = 0;
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, dbg_s->gpu_instance_id);
+	struct nvgpu_gr_config *gr_config =
+		nvgpu_gr_get_gpu_instance_config_ptr(g, dbg_s->gpu_instance_id);
 
 	ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
 	if (ch == NULL) {
@@ -1651,7 +1681,7 @@ static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(
 	}
 
 	sm_id = args->sm_id;
-	if (sm_id >= g->ops.gr.init.get_no_of_sm(g)) {
+	if (sm_id >= nvgpu_gr_config_get_no_of_sm(gr_config)) {
 		return -EINVAL;
 	}
 
@@ -1662,8 +1692,9 @@ static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(
 		return err;
 	}
 
-	err = nvgpu_pg_elpg_protected_call(g,
-			g->ops.gr.clear_sm_error_state(g, ch, sm_id));
+	err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+			nvgpu_pg_elpg_protected_call(g,
+				g->ops.gr.clear_sm_error_state(g, ch, sm_id)));
 
 	gk20a_idle(g);
 
@@ -1677,6 +1708,8 @@ nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s,
 	struct gk20a *g = dbg_s->g;
 	int err = 0;
 	int ctx_resident_ch_fd = -1;
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, dbg_s->gpu_instance_id);
 
 	err = gk20a_busy(g);
 	if (err)
@@ -1685,13 +1718,15 @@ nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s,
 	nvgpu_speculation_barrier();
 	switch (args->action) {
 	case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS:
-		err = g->ops.gr.suspend_contexts(g, dbg_s,
-					&ctx_resident_ch_fd);
+		err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+				g->ops.gr.suspend_contexts(g, dbg_s,
+					&ctx_resident_ch_fd));
 		break;
 
 	case NVGPU_DBG_GPU_RESUME_ALL_CONTEXTS:
-		err = g->ops.gr.resume_contexts(g, dbg_s,
-					&ctx_resident_ch_fd);
+		err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+				g->ops.gr.resume_contexts(g, dbg_s,
+					&ctx_resident_ch_fd));
 		break;
 	}
 
@@ -2691,12 +2726,14 @@ int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp)
 {
 	struct gk20a *g;
 	struct nvgpu_cdev *cdev;
+	u32 gpu_instance_id;
 
 	cdev = container_of(inode->i_cdev, struct nvgpu_cdev, cdev);
 	g = nvgpu_get_gk20a_from_cdev(cdev);
+	gpu_instance_id = nvgpu_get_gpu_instance_id_from_cdev(g, cdev);
 
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
-	return gk20a_dbg_gpu_do_dev_open(g, filp, false /* not profiler */);
+	return gk20a_dbg_gpu_do_dev_open(g, filp, gpu_instance_id, false /* not profiler */);
 }
 
 long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
@@ -2707,8 +2744,15 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 	struct gk20a *g = dbg_s->g;
 	u8 buf[NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE];
 	int err = 0;
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, dbg_s->gpu_instance_id);
 
-	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
+		"gpu_instance_id [%u] gr_instance_id [%u]",
+		dbg_s->gpu_instance_id, gr_instance_id);
+
+	nvgpu_assert(dbg_s->gpu_instance_id < g->mig.num_gpu_instances);
+	nvgpu_assert(gr_instance_id < g->num_gr_instances);
 
 	if ((_IOC_TYPE(cmd) != NVGPU_DBG_GPU_IOCTL_MAGIC) ||
 	    (_IOC_NR(cmd) == 0) ||
@@ -2768,8 +2812,9 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 		break;
 
 	case NVGPU_DBG_GPU_IOCTL_SUSPEND_RESUME_ALL_SMS:
-		err = nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s,
-		       (struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf);
+		err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+				nvgpu_dbg_gpu_ioctl_suspend_resume_sm(dbg_s,
+					(struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *)buf));
 		break;
 
 	case NVGPU_DBG_GPU_IOCTL_PERFBUF_MAP:
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c
index 0ec4a37d3..1d49f1087 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c
@@ -45,6 +45,10 @@
 #endif
 /** @endcond DOXYGEN_SHOULD_SKIP_THIS */
 
+#include <nvgpu/gr/gr_utils.h>
+#include <nvgpu/gr/gr_instances.h>
+#include <nvgpu/grmgr.h>
+
 #define NVGPU_PROF_UMD_COPY_WINDOW_SIZE		SZ_4K
 
 struct nvgpu_profiler_object_priv {
@@ -83,7 +87,8 @@ struct nvgpu_profiler_object_priv {
 static void nvgpu_prof_free_pma_stream_priv_data(struct nvgpu_profiler_object_priv *priv);
 
 static int nvgpu_prof_fops_open(struct gk20a *g, struct file *filp,
-		enum nvgpu_profiler_pm_reservation_scope scope)
+		enum nvgpu_profiler_pm_reservation_scope scope,
+		u32 gpu_instance_id)
 {
 	struct nvgpu_profiler_object_priv *prof_priv;
 	struct nvgpu_profiler_object *prof;
@@ -98,7 +103,7 @@ static int nvgpu_prof_fops_open(struct gk20a *g, struct file *filp,
 		return -ENOMEM;
 	}
 
-	err = nvgpu_profiler_alloc(g, &prof, scope);
+	err = nvgpu_profiler_alloc(g, &prof, scope, gpu_instance_id);
 	if (err != 0) {
 		goto free_priv;
 	}
@@ -141,9 +146,11 @@ int nvgpu_prof_dev_fops_open(struct inode *inode, struct file *filp)
 	struct gk20a *g;
 	int err;
 	struct nvgpu_cdev *cdev;
+	u32 gpu_instance_id;
 
 	cdev = container_of(inode->i_cdev, struct nvgpu_cdev, cdev);
 	g = nvgpu_get_gk20a_from_cdev(cdev);
+	gpu_instance_id = nvgpu_get_gpu_instance_id_from_cdev(g, cdev);
 
 	g = nvgpu_get(g);
 	if (!g) {
@@ -157,7 +164,8 @@ int nvgpu_prof_dev_fops_open(struct inode *inode, struct file *filp)
 	}
 
 	err = nvgpu_prof_fops_open(g, filp,
-			NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE);
+			NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE,
+			gpu_instance_id);
 	if (err != 0) {
 		nvgpu_put(g);
 	}
@@ -170,9 +178,11 @@ int nvgpu_prof_ctx_fops_open(struct inode *inode, struct file *filp)
 	struct gk20a *g;
 	int err;
 	struct nvgpu_cdev *cdev;
+	u32 gpu_instance_id;
 
 	cdev = container_of(inode->i_cdev, struct nvgpu_cdev, cdev);
 	g = nvgpu_get_gk20a_from_cdev(cdev);
+	gpu_instance_id = nvgpu_get_gpu_instance_id_from_cdev(g, cdev);
 
 	g = nvgpu_get(g);
 	if (!g) {
@@ -185,7 +195,8 @@ int nvgpu_prof_ctx_fops_open(struct inode *inode, struct file *filp)
 	}
 
 	err = nvgpu_prof_fops_open(g, filp,
-			NVGPU_PROFILER_PM_RESERVATION_SCOPE_CONTEXT);
+			NVGPU_PROFILER_PM_RESERVATION_SCOPE_CONTEXT,
+			gpu_instance_id);
 	if (err != 0) {
 		nvgpu_put(g);
 	}
@@ -595,6 +606,8 @@ static int nvgpu_prof_ioctl_exec_reg_ops(struct nvgpu_profiler_object_priv *priv
 	u32 flags = 0U;
 	bool all_passed = true;
 	int err;
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, prof->gpu_instance_id);
 
 	nvgpu_log(g, gpu_dbg_prof,
 		"REG_OPS for handle %u: count=%u mode=%u flags=0x%x",
@@ -654,9 +667,10 @@ static int nvgpu_prof_ioctl_exec_reg_ops(struct nvgpu_profiler_object_priv *priv
 			flags &= ~NVGPU_REG_OP_FLAG_ALL_PASSED;
 		}
 
-		err = g->ops.regops.exec_regops(g, tsg, prof,
-			priv->regops_staging_buf, num_ops,
-			&flags);
+		err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
+				g->ops.regops.exec_regops(g, tsg, prof,
+					priv->regops_staging_buf, num_ops,
+					&flags));
 		if (err) {
 			nvgpu_err(g, "regop execution failed");
 			break;
@@ -756,6 +770,15 @@ long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd,
 	struct gk20a *g = prof_priv->g;
 	u8 __maybe_unused buf[NVGPU_PROFILER_IOCTL_MAX_ARG_SIZE];
 	int err = 0;
+	u32 gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id(g, prof->gpu_instance_id);
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
+		"gpu_instance_id [%u] gr_instance_id [%u]",
+		prof->gpu_instance_id, gr_instance_id);
+
+	nvgpu_assert(prof->gpu_instance_id < g->mig.num_gpu_instances);
+	nvgpu_assert(gr_instance_id < g->num_gr_instances);
 
 	if ((_IOC_TYPE(cmd) != NVGPU_PROFILER_IOCTL_MAGIC) ||
 	    (_IOC_NR(cmd) == 0) ||