diff --git a/drivers/gpu/nvgpu/common/regops/regops.c b/drivers/gpu/nvgpu/common/regops/regops.c
index e4d74da2f..feb4b6067 100644
--- a/drivers/gpu/nvgpu/common/regops/regops.c
+++ b/drivers/gpu/nvgpu/common/regops/regops.c
@@ -122,6 +122,15 @@ int exec_regops_gk20a(struct gk20a *g,
 			continue;
 		}
 
+		/*
+		 * Move to next op if current op is invalid.
+		 * Execution will reach here only if CONTINUE_ON_ERROR
+		 * mode is requested.
+		 */
+		if (ops[i].status != REGOP(STATUS_SUCCESS)) {
+			continue;
+		}
+
 		switch (ops[i].op) {
 
 		case REGOP(READ_32):
diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
index 965bd6f81..ed4111116 100644
--- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gk20a.c
@@ -1400,6 +1400,16 @@ static int gr_exec_ctx_ops(struct nvgpu_tsg *tsg,
 				if (ctx_op_nr >= num_ctx_ops[pass]) {
 					break;
 				}
+
+				/*
+				 * Move to next op if current op is invalid.
+				 * Execution will reach here only if CONTINUE_ON_ERROR
+				 * mode is requested.
+				 */
+				if (ctx_ops[i].status != REGOP(STATUS_SUCCESS)) {
+					continue;
+				}
+
 				/* only do ctx ops and only on the right pass */
 				if ((ctx_ops[i].type == REGOP(TYPE_GLOBAL)) ||
 				    (((pass == 0) && reg_op_is_read(ctx_ops[i].op)) ||
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index 04503267d..9d4e8d560 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -579,7 +579,7 @@ static int dbg_unbind_all_channels_gk20a(struct dbg_session_gk20a *dbg_s)
  * Convert common regops op values of the form of NVGPU_DBG_REG_OP_*
  * into linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_*
  */
-static u32 nvgpu_get_regops_op_values_linux(u32 regops_op)
+u32 nvgpu_get_regops_op_values_linux(u32 regops_op)
 {
 	switch (regops_op) {
 	case REGOP(READ_32):
@@ -603,7 +603,7 @@ static u32 nvgpu_get_regops_op_values_linux(u32 regops_op)
  * Convert linux regops op values of the form of NVGPU_DBG_GPU_REG_OP_*
  * into common regops op values of the form of NVGPU_DBG_REG_OP_*
  */
-static u32 nvgpu_get_regops_op_values_common(u32 regops_op)
+u32 nvgpu_get_regops_op_values_common(u32 regops_op)
 {
 	switch (regops_op) {
 	case REGOP_LINUX(READ_32):
@@ -679,7 +679,7 @@ static u32 nvgpu_get_regops_type_values_common(u32 regops_type)
  * Convert common regops status values of the form of NVGPU_DBG_REG_OP_STATUS_*
  * into linux regops type values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_*
  */
-static u32 nvgpu_get_regops_status_values_linux(u32 regops_status)
+u32 nvgpu_get_regops_status_values_linux(u32 regops_status)
 {
 	switch (regops_status) {
 	case REGOP(STATUS_SUCCESS):
@@ -703,7 +703,7 @@ static u32 nvgpu_get_regops_status_values_linux(u32 regops_status)
  * Convert linux regops status values of the form of NVGPU_DBG_GPU_REG_OP_STATUS_*
  * into common regops type values of the form of NVGPU_DBG_REG_OP_STATUS_*
  */
-static u32 nvgpu_get_regops_status_values_common(u32 regops_status)
+u32 nvgpu_get_regops_status_values_common(u32 regops_status)
 {
 	switch (regops_status) {
 	case REGOP_LINUX(STATUS_SUCCESS):
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h
index 2e188cc04..bfa088eb4 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.h
@@ -35,4 +35,9 @@ unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait);
 /* used by profiler driver interface */
 int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp);
 
+u32 nvgpu_get_regops_op_values_common(u32 regops_op);
+u32 nvgpu_get_regops_status_values_common(u32 regops_status);
+u32 nvgpu_get_regops_op_values_linux(u32 regops_op);
+u32 nvgpu_get_regops_status_values_linux(u32 regops_status);
+
 #endif
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c
index e2f3841f4..d4fd21723 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_prof.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_prof.c
@@ -25,16 +25,42 @@
 #include <nvgpu/gk20a.h>
 #include <nvgpu/nvgpu_init.h>
 #include <nvgpu/profiler.h>
+#include <nvgpu/regops.h>
 #include <nvgpu/pm_reservation.h>
 #include <nvgpu/tsg.h>
 
 #include "os_linux.h"
 #include "ioctl_prof.h"
+#include "ioctl_dbg.h"
 #include "ioctl_tsg.h"
 
+#define NVGPU_PROF_UMD_COPY_WINDOW_SIZE		SZ_4K
+
 struct nvgpu_profiler_object_priv {
 	struct nvgpu_profiler_object *prof;
 	struct gk20a *g;
+
+	/*
+	 * Staging buffer to hold regops copied from userspace.
+	 * Regops are stored in struct nvgpu_profiler_reg_op format. This
+	 * struct is added for new profiler design and is trimmed down
+	 * version of legacy regop struct nvgpu_dbg_reg_op.
+	 *
+	 * Struct nvgpu_profiler_reg_op is OS specific struct and cannot
+	 * be used in common nvgpu code.
+	 */
+	struct nvgpu_profiler_reg_op *regops_umd_copy_buf;
+
+	/*
+	 * Staging buffer to execute regops in common code.
+	 * Regops are stored in struct nvgpu_dbg_reg_op which is defined
+	 * in common code.
+	 *
+	 * Regops in struct nvgpu_profiler_reg_op should be first converted
+	 * to this format and this handle should be passed for regops
+	 * execution.
+	 */
+	struct nvgpu_dbg_reg_op *regops_staging_buf;
 };
 
 static int nvgpu_prof_fops_open(struct gk20a *g, struct file *filp,
@@ -42,6 +68,7 @@ static int nvgpu_prof_fops_open(struct gk20a *g, struct file *filp,
 {
 	struct nvgpu_profiler_object_priv *prof_priv;
 	struct nvgpu_profiler_object *prof;
+	u32 num_regops;
 	int err;
 
 	nvgpu_log(g, gpu_dbg_prof, "Request to open profiler session with scope %u",
@@ -54,19 +81,40 @@ static int nvgpu_prof_fops_open(struct gk20a *g, struct file *filp,
 
 	err = nvgpu_profiler_alloc(g, &prof, scope);
 	if (err != 0) {
-		nvgpu_kfree(g, prof_priv);
-		return -ENOMEM;
+		goto free_priv;
 	}
 
 	prof_priv->g = g;
 	prof_priv->prof = prof;
 	filp->private_data = prof_priv;
 
+	prof_priv->regops_umd_copy_buf = nvgpu_kzalloc(g,
+			NVGPU_PROF_UMD_COPY_WINDOW_SIZE);
+	if (prof_priv->regops_umd_copy_buf == NULL) {
+		goto free_prof;
+	}
+
+	num_regops = NVGPU_PROF_UMD_COPY_WINDOW_SIZE /
+		     sizeof(prof_priv->regops_umd_copy_buf[0]);
+	prof_priv->regops_staging_buf = nvgpu_kzalloc(g,
+		num_regops * sizeof(prof_priv->regops_staging_buf[0]));
+	if (prof_priv->regops_staging_buf == NULL) {
+		goto free_umd_buf;
+	}
+
 	nvgpu_log(g, gpu_dbg_prof,
 		"Profiler session with scope %u created successfully with profiler handle %u",
 		scope, prof->prof_handle);
 
 	return 0;
+
+free_umd_buf:
+	nvgpu_kfree(g, prof_priv->regops_umd_copy_buf);
+free_prof:
+	nvgpu_profiler_free(prof);
+free_priv:
+	nvgpu_kfree(g, prof_priv);
+	return err;
 }
 
 int nvgpu_prof_dev_fops_open(struct inode *inode, struct file *filp)
@@ -132,6 +180,10 @@ int nvgpu_prof_fops_release(struct inode *inode, struct file *filp)
 		prof->scope, prof->prof_handle);
 
 	nvgpu_profiler_free(prof);
+
+	nvgpu_kfree(g, prof_priv->regops_umd_copy_buf);
+	nvgpu_kfree(g, prof_priv->regops_staging_buf);
+
 	nvgpu_kfree(g, prof_priv);
 	nvgpu_put(g);
 
@@ -283,6 +335,168 @@ static int nvgpu_prof_ioctl_unbind_pm_resources(struct nvgpu_profiler_object *pr
 	return nvgpu_profiler_unbind_pm_resources(prof);
 }
 
+static void nvgpu_prof_get_regops_staging_data(struct nvgpu_profiler_object *prof,
+		struct nvgpu_profiler_reg_op *in,
+		struct nvgpu_dbg_reg_op *out, u32 num_ops)
+{
+	u32 i;
+	u8 reg_op_type = 0U;
+
+	switch (prof->scope) {
+	case NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE:
+		if (prof->tsg != NULL) {
+			reg_op_type = NVGPU_DBG_REG_OP_TYPE_GR_CTX;
+		} else {
+			reg_op_type = NVGPU_DBG_REG_OP_TYPE_GLOBAL;
+		}
+		break;
+	case NVGPU_PROFILER_PM_RESERVATION_SCOPE_CONTEXT:
+		reg_op_type = NVGPU_DBG_REG_OP_TYPE_GR_CTX;
+		break;
+	}
+
+	for (i = 0; i < num_ops; i++) {
+		out[i].op = nvgpu_get_regops_op_values_common(in[i].op);
+		out[i].type = reg_op_type;
+		out[i].status = nvgpu_get_regops_status_values_common(in[i].status);
+		out[i].quad = 0U;
+		out[i].group_mask = 0U;
+		out[i].sub_group_mask = 0U;
+		out[i].offset = in[i].offset;
+		out[i].value_lo = u64_lo32(in[i].value);
+		out[i].value_hi = u64_hi32(in[i].value);
+		out[i].and_n_mask_lo = u64_lo32(in[i].and_n_mask);
+		out[i].and_n_mask_hi = u64_hi32(in[i].and_n_mask);
+	}
+}
+
+static void nvgpu_prof_get_regops_linux_data(struct nvgpu_dbg_reg_op *in,
+		struct nvgpu_profiler_reg_op *out, u32 num_ops)
+{
+	u32 i;
+
+	for (i = 0; i < num_ops; i++) {
+		out[i].op = nvgpu_get_regops_op_values_linux(in[i].op);
+		out[i].status = nvgpu_get_regops_status_values_linux(in[i].status);
+		out[i].offset = in[i].offset;
+		out[i].value = hi32_lo32_to_u64(in[i].value_hi, in[i].value_lo);
+		out[i].and_n_mask = hi32_lo32_to_u64(in[i].and_n_mask_hi, in[i].and_n_mask_lo);
+	}
+}
+
+static int nvgpu_prof_ioctl_exec_reg_ops(struct nvgpu_profiler_object_priv *priv,
+		struct nvgpu_profiler_exec_reg_ops_args *args)
+{
+	struct nvgpu_profiler_object *prof = priv->prof;
+	struct gk20a *g = prof->g;
+	struct nvgpu_tsg *tsg = prof->tsg;
+	u32 num_regops_in_copy_buf = NVGPU_PROF_UMD_COPY_WINDOW_SIZE /
+				     sizeof(priv->regops_umd_copy_buf[0]);
+	u32 ops_offset = 0;
+	u32 flags = 0U;
+	bool all_passed = true;
+	int err;
+
+	nvgpu_log(g, gpu_dbg_prof,
+		"REG_OPS for handle %u: count=%u mode=%u flags=0x%x",
+		prof->prof_handle, args->count, args->mode, args->flags);
+
+	if (args->count == 0) {
+		return -EINVAL;
+	}
+
+	if (args->count > NVGPU_IOCTL_DBG_REG_OPS_LIMIT) {
+		nvgpu_err(g, "regops limit exceeded");
+		return -EINVAL;
+	}
+
+	if (!prof->bound) {
+		nvgpu_err(g, "PM resources are not bound to profiler");
+		return -EINVAL;
+	}
+
+	if (args->mode == NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR) {
+		flags |= NVGPU_REG_OP_FLAG_MODE_CONTINUE_ON_ERROR;
+	} else {
+		flags |= NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE;
+	}
+
+	while (ops_offset < args->count) {
+		const u32 num_ops =
+			min(args->count - ops_offset, num_regops_in_copy_buf);
+		const u64 fragment_size =
+			num_ops * sizeof(priv->regops_umd_copy_buf[0]);
+		void __user *const user_fragment =
+			(void __user *)(uintptr_t)
+			(args->ops +
+			 ops_offset * sizeof(priv->regops_umd_copy_buf[0]));
+
+		nvgpu_log(g, gpu_dbg_prof, "Regops fragment: start_op=%u ops=%u",
+			     ops_offset, num_ops);
+
+		if (copy_from_user(priv->regops_umd_copy_buf,
+				   user_fragment, fragment_size)) {
+			nvgpu_err(g, "copy_from_user failed!");
+			err = -EFAULT;
+			break;
+		}
+
+		nvgpu_prof_get_regops_staging_data(prof,
+			priv->regops_umd_copy_buf,
+			priv->regops_staging_buf, num_ops);
+
+		if (args->mode == NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR) {
+			flags &= ~NVGPU_REG_OP_FLAG_ALL_PASSED;
+		}
+
+		err = g->ops.regops.exec_regops(g, tsg,
+			priv->regops_staging_buf, num_ops,
+			&flags);
+		if (err) {
+			nvgpu_err(g, "regop execution failed");
+			break;
+		}
+
+		if (ops_offset == 0) {
+			if (flags & NVGPU_REG_OP_FLAG_DIRECT_OPS) {
+				args->flags |=
+					NVGPU_PROFILER_EXEC_REG_OPS_ARG_FLAG_DIRECT_OPS;
+			}
+		}
+
+		if (args->mode == NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR) {
+			if ((flags & NVGPU_REG_OP_FLAG_ALL_PASSED) == 0) {
+				all_passed = false;
+			}
+		}
+
+		 nvgpu_prof_get_regops_linux_data(
+			priv->regops_staging_buf,
+			priv->regops_umd_copy_buf, num_ops);
+
+		if (copy_to_user(user_fragment,
+				 priv->regops_umd_copy_buf,
+				 fragment_size)) {
+			nvgpu_err(g, "copy_to_user failed!");
+			err = -EFAULT;
+			break;
+		}
+
+		ops_offset += num_ops;
+	}
+
+	if (args->mode == NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR
+			&& all_passed && (err == 0)) {
+		args->flags |= NVGPU_PROFILER_EXEC_REG_OPS_ARG_FLAG_ALL_PASSED;
+	}
+
+	nvgpu_log(g, gpu_dbg_prof,
+		"REG_OPS for handle %u complete: count=%u mode=%u flags=0x%x err=%d",
+		prof->prof_handle, args->count, args->mode, args->flags, err);
+
+	return err;
+}
+
 long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd,
 		unsigned long arg)
 {
@@ -341,6 +555,11 @@ long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd,
 		err = nvgpu_prof_ioctl_unbind_pm_resources(prof);
 		break;
 
+	case NVGPU_PROFILER_IOCTL_EXEC_REG_OPS:
+		err = nvgpu_prof_ioctl_exec_reg_ops(prof_priv,
+			(struct nvgpu_profiler_exec_reg_ops_args *)buf);
+		break;
+
 	default:
 		nvgpu_err(g, "unrecognized profiler ioctl cmd: 0x%x", cmd);
 		err = -ENOTTY;
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 508090e9f..33850bb53 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -1577,14 +1577,31 @@ struct nvgpu_profiler_pma_stream_update_get_put_args {
 	__u32 reserved[3];
 };
 
-enum {
-	NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_ALL_OR_NONE,
-	NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR,
-};
+/*
+ * MODE_ALL_OR_NONE
+ * Reg_ops execution will bail out if any of the reg_op is not valid
+ * or if there is any other error such as failure to access context image.
+ * Subsequent reg_ops will not be executed and nvgpu_profiler_reg_op.status
+ * will not be populated for them.
+ * IOCTL will always return error for all of the errors.
+ */
+#define NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_ALL_OR_NONE	0U
+/*
+ * MODE_CONTINUE_ON_ERROR
+ * This mode allows continuing reg_ops execution even if some of the
+ * reg_ops are not valid. Invalid reg_ops will be skipped and valid
+ * ones will be executed.
+ * IOCTL will return error only if there is some other severe failure
+ * such as failure to access context image.
+ * If any of the reg_op is invalid, or if didn't pass, it will be
+ * reported via NVGPU_PROFILER_EXEC_REG_OPS_ARG_FLAG_ALL_PASSED flag.
+ * IOCTL will return success in such cases.
+ */
+#define NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR	1U
 
 struct nvgpu_profiler_reg_op {
-	__u8    op;
-	__u8    status;
+	__u8    op;		/* Operation in the form NVGPU_DBG_GPU_REG_OP_READ/WRITE_* */
+	__u8    status;		/* Status in the form NVGPU_DBG_GPU_REG_OP_STATUS_* */
 	__u32   offset;
 	__u64   value;
 	__u64   and_n_mask;
@@ -1593,7 +1610,10 @@ struct nvgpu_profiler_reg_op {
 struct nvgpu_profiler_exec_reg_ops_args {
 	__u32 mode;		/* in: operation mode NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_* */
 
-	__u32 count;		/* in: number of reg_ops operations */
+	__u32 count;		/* in: number of reg_ops operations,
+				 *     upper limit nvgpu_gpu_characteristics.reg_ops_limit
+				 */
+
 	__u64 ops;		/* in/out: pointer to actual operations nvgpu_profiler_reg_op */
 
 /* out: if all reg_ops passed, valid only for MODE_CONTINUE_ON_ERROR */