diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 9796d7c65..713c77371 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -328,18 +328,9 @@ static int channel_gk20a_setup_userd(struct channel_gk20a *c)
 static void channel_gk20a_bind(struct channel_gk20a *c)
 {
 	struct gk20a *g = c->g;
-	struct fifo_gk20a *f = &g->fifo;
-	u32 engine_id;
-	struct fifo_engine_info_gk20a *engine_info = NULL;
 	u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block)
 		>> ram_in_base_shift_v();
 
-	/* TODO:Need to handle non GR engine channel bind path */
-	engine_id = gk20a_fifo_get_gr_engine_id(g);
-
-	/* Consider 1st available GR engine */
-	engine_info = (f->engine_info + engine_id);
-
 	gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
 		c->hw_chid, inst_ptr);
 
@@ -348,7 +339,7 @@ static void channel_gk20a_bind(struct channel_gk20a *c)
 	gk20a_writel(g, ccsr_channel_r(c->hw_chid),
 		(gk20a_readl(g, ccsr_channel_r(c->hw_chid)) &
 		 ~ccsr_channel_runlist_f(~0)) |
-		 ccsr_channel_runlist_f(engine_info->runlist_id));
+		 ccsr_channel_runlist_f(c->runlist_id));
 
 	gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid),
 		ccsr_channel_inst_ptr_f(inst_ptr) |
@@ -401,7 +392,7 @@ void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
 
 static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
 {
-	return c->g->ops.fifo.update_runlist(c->g, 0, c->hw_chid, add, true);
+	return c->g->ops.fifo.update_runlist(c->g, c->runlist_id, c->hw_chid, add, true);
 }
 
 void channel_gk20a_enable(struct channel_gk20a *ch)
@@ -715,7 +706,7 @@ static int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch,
 		break;
 	}
 
-	return ret ? ret : g->ops.fifo.update_runlist(g, 0, ~0, true, true);
+	return ret ? ret : g->ops.fifo.update_runlist(g, ch->runlist_id, ~0, true, true);
 }
 
 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
@@ -1102,7 +1093,7 @@ struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
 		void (*update_fn)(struct channel_gk20a *, void *),
 		void *update_fn_data)
 {
-	struct channel_gk20a *ch = gk20a_open_new_channel(g);
+	struct channel_gk20a *ch = gk20a_open_new_channel(g, -1);
 
 	if (ch) {
 		spin_lock(&ch->update_fn_lock);
@@ -1114,11 +1105,16 @@ struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
 	return ch;
 }
 
-struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
+struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, s32 runlist_id)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	struct channel_gk20a *ch;
 
+	/* compatibility with existing code */
+	if (!gk20a_fifo_is_valid_runlist_id(g, runlist_id)) {
+		runlist_id = gk20a_fifo_get_gr_runlist_id(g);
+	}
+
 	gk20a_dbg_fn("");
 
 	ch = allocate_channel(f);
@@ -1133,6 +1129,9 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
 	BUG_ON(ch->g);
 	ch->g = g;
 
+	/* Runlist for the channel */
+	ch->runlist_id = runlist_id;
+
 	if (g->ops.fifo.alloc_inst(g, ch)) {
 		ch->g = NULL;
 		free_channel(f, ch);
@@ -1184,7 +1183,8 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
 	return ch;
 }
 
-static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
+/* note: runlist_id -1 is synonym for the ENGINE_GR_GK20A runlist id */
+static int __gk20a_channel_open(struct gk20a *g, struct file *filp, s32 runlist_id)
 {
 	int err;
 	struct channel_gk20a *ch;
@@ -1198,7 +1198,7 @@ static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
 		gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
 		return err;
 	}
-	ch = gk20a_open_new_channel(g);
+	ch = gk20a_open_new_channel(g, runlist_id);
 	gk20a_idle(g->dev);
 	if (!ch) {
 		gk20a_err(dev_from_gk20a(g),
@@ -1220,7 +1220,7 @@ int gk20a_channel_open(struct inode *inode, struct file *filp)
 	int ret;
 
 	gk20a_dbg_fn("start");
-	ret = __gk20a_channel_open(g, filp);
+	ret = __gk20a_channel_open(g, filp, -1);
 
 	gk20a_dbg_fn("end");
 	return ret;
@@ -1233,6 +1233,7 @@ int gk20a_channel_open_ioctl(struct gk20a *g,
 	int fd;
 	struct file *file;
 	char *name;
+	s32 runlist_id = args->in.runlist_id;
 
 	err = get_unused_fd_flags(O_RDWR);
 	if (err < 0)
@@ -1253,12 +1254,12 @@ int gk20a_channel_open_ioctl(struct gk20a *g,
 		goto clean_up;
 	}
 
-	err = __gk20a_channel_open(g, file);
+	err = __gk20a_channel_open(g, file, runlist_id);
 	if (err)
 		goto clean_up_file;
 
 	fd_install(fd, file);
-	args->channel_fd = fd;
+	args->out.channel_fd = fd;
 	return 0;
 
 clean_up_file:
@@ -2780,6 +2781,7 @@ int gk20a_channel_suspend(struct gk20a *g)
 	u32 chid;
 	bool channels_in_use = false;
 	int err;
+	u32 active_runlist_ids = 0;
 
 	gk20a_dbg_fn("");
 
@@ -2803,12 +2805,14 @@ int gk20a_channel_suspend(struct gk20a *g)
 
 			channels_in_use = true;
 
+			active_runlist_ids |= BIT(ch->runlist_id);
+
 			gk20a_channel_put(ch);
 		}
 	}
 
 	if (channels_in_use) {
-		g->ops.fifo.update_runlist(g, 0, ~0, false, true);
+		gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, false, true);
 
 		for (chid = 0; chid < f->num_channels; chid++) {
 			if (gk20a_channel_get(&f->channel[chid])) {
@@ -2827,6 +2831,7 @@ int gk20a_channel_resume(struct gk20a *g)
 	struct fifo_gk20a *f = &g->fifo;
 	u32 chid;
 	bool channels_in_use = false;
+	u32 active_runlist_ids = 0;
 
 	gk20a_dbg_fn("");
 
@@ -2835,12 +2840,13 @@ int gk20a_channel_resume(struct gk20a *g)
 			gk20a_dbg_info("resume channel %d", chid);
 			g->ops.fifo.bind_channel(&f->channel[chid]);
 			channels_in_use = true;
+			active_runlist_ids |= BIT(f->channel[chid].runlist_id);
 			gk20a_channel_put(&f->channel[chid]);
 		}
 	}
 
 	if (channels_in_use)
-		g->ops.fifo.update_runlist(g, 0, ~0, true, true);
+		gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, true, true);
 
 	gk20a_dbg_fn("done");
 	return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 4e0db3cfb..d85289842 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -198,6 +198,8 @@ struct channel_gk20a {
 	struct work_struct update_fn_work;
 
 	u32 interleave_level;
+
+	u32 runlist_id;
 };
 
 static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
@@ -250,7 +252,9 @@ void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller);
 #define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__)
 
 int gk20a_wait_channel_idle(struct channel_gk20a *ch);
-struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g);
+
+/* runlist_id -1 is synonym for ENGINE_GR_GK20A runlist id */
+struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, s32 runlist_id);
 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
 		void (*update_fn)(struct channel_gk20a *, void *),
 		void *update_fn_data);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 766ea749c..ab06b4f9c 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -165,6 +165,61 @@ u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g)
 	return reset_mask;
 }
 
+u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g)
+{
+	u32 gr_engine_cnt = 0;
+	u32 gr_engine_id = FIFO_INVAL_ENGINE_ID;
+	struct fifo_engine_info_gk20a *engine_info;
+	u32 gr_runlist_id = ~0;
+
+	/* Consider 1st available GR engine */
+	gr_engine_cnt = gk20a_fifo_get_engine_ids(g, &gr_engine_id,
+			1, ENGINE_GR_GK20A);
+
+	if (!gr_engine_cnt) {
+		gk20a_err(dev_from_gk20a(g),
+			"No GR engine available on this device!");
+		goto end;
+	}
+
+	engine_info = gk20a_fifo_get_engine_info(g, gr_engine_id);
+
+	if (engine_info) {
+		gr_runlist_id = engine_info->runlist_id;
+	} else {
+		gk20a_err(g->dev,
+			"gr_engine_id is not in active list/invalid %d", gr_engine_id);
+	}
+
+end:
+	return gr_runlist_id;
+}
+
+bool gk20a_fifo_is_valid_runlist_id(struct gk20a *g, u32 runlist_id)
+{
+	struct fifo_gk20a *f = NULL;
+	u32 engine_id_idx;
+	u32 active_engine_id;
+	struct fifo_engine_info_gk20a *engine_info;
+
+	if (!g)
+		return false;
+
+	f = &g->fifo;
+
+	for (engine_id_idx = 0; engine_id_idx < f->num_engines; ++engine_id_idx) {
+		active_engine_id = f->active_engines_list[engine_id_idx];
+		engine_info = gk20a_fifo_get_engine_info(g, active_engine_id);
+		if (engine_info && (engine_info->runlist_id == runlist_id)) {
+			return true;
+		}
+	}
+
+	gk20a_err(g->dev, "runlist_id is not in active list/invalid %d", runlist_id);
+
+	return false;
+}
+
 /*
  * Link engine IDs to MMU IDs and vice versa.
  */
@@ -2736,6 +2791,30 @@ clean_up:
 	return ret;
 }
 
+int gk20a_fifo_update_runlist_ids(struct gk20a *g, u32 runlist_ids, u32 hw_chid,
+				bool add, bool wait_for_finish)
+{
+	u32 ret = -EINVAL;
+	u32 runlist_id = 0;
+	u32 errcode;
+
+	if (!g)
+		goto end;
+
+	ret = 0;
+	for_each_set_bit(runlist_id, (unsigned long *)&runlist_ids, 32) {
+		/* Capture the last failure error code */
+		errcode = g->ops.fifo.update_runlist(g, runlist_id, hw_chid, add, wait_for_finish);
+		if (errcode) {
+			gk20a_err(dev_from_gk20a(g),
+				"failed to update_runlist %d %d", runlist_id, errcode);
+			ret = errcode;
+		}
+	}
+end:
+	return ret;
+}
+
 /* add/remove a channel from runlist
    special cases below: runlist->active_channels will NOT be changed.
    (hw_chid == ~0 && !add) means remove all active channels from runlist.
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 25d2cd9f2..e6ae0bdcd 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -243,4 +243,11 @@ bool gk20a_fifo_is_valid_engine_id(struct gk20a *g, u32 engine_id);
 u32 gk20a_fifo_get_gr_engine_id(struct gk20a *g);
 
 u32 gk20a_fifo_get_all_ce_engine_reset_mask(struct gk20a *g);
+
+u32 gk20a_fifo_get_gr_runlist_id(struct gk20a *g);
+
+bool gk20a_fifo_is_valid_runlist_id(struct gk20a *g, u32 runlist_id);
+
+int gk20a_fifo_update_runlist_ids(struct gk20a *g, u32 runlist_ids, u32 hw_chid,
+		bool add, bool wait_for_finish);
 #endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
index f7d5535d1..603ed6f5b 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -114,6 +114,16 @@ int gk20a_tsg_bind_channel(struct tsg_gk20a *tsg,
 
 	ch->tsgid = tsg->tsgid;
 
+	/* all the channel part of TSG should need to be same runlist_id */
+	if (tsg->runlist_id == ~0)
+		tsg->runlist_id = ch->runlist_id;
+	else if (tsg->runlist_id != ch->runlist_id) {
+		gk20a_err(dev_from_gk20a(tsg->g),
+			"Error: TSG channel should be share same runlist ch[%d] tsg[%d]\n",
+			ch->runlist_id, tsg->runlist_id);
+		return -EINVAL;
+	}
+
 	mutex_lock(&tsg->ch_list_lock);
 	list_add_tail(&ch->ch_entry, &tsg->ch_list);
 	mutex_unlock(&tsg->ch_list_lock);
@@ -185,7 +195,7 @@ static int gk20a_tsg_set_priority(struct gk20a *g, struct tsg_gk20a *tsg,
 	gk20a_channel_get_timescale_from_timeslice(g, tsg->timeslice_us,
 			&tsg->timeslice_timeout, &tsg->timeslice_scale);
 
-	g->ops.fifo.update_runlist(g, 0, ~0, true, true);
+	g->ops.fifo.update_runlist(g, tsg->runlist_id, ~0, true, true);
 
 	return 0;
 }
@@ -346,7 +356,7 @@ static int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level)
 		break;
 	}
 
-	return ret ? ret : g->ops.fifo.update_runlist(g, 0, ~0, true, true);
+	return ret ? ret : g->ops.fifo.update_runlist(g, tsg->runlist_id, ~0, true, true);
 }
 
 static int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
@@ -360,7 +370,7 @@ static int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice)
 	gk20a_channel_get_timescale_from_timeslice(g, timeslice,
 			&tsg->timeslice_timeout, &tsg->timeslice_scale);
 
-	return g->ops.fifo.update_runlist(g, 0, ~0, true, true);
+	return g->ops.fifo.update_runlist(g, tsg->runlist_id, ~0, true, true);
 }
 
 static void release_used_tsg(struct fifo_gk20a *f, struct tsg_gk20a *tsg)
@@ -411,6 +421,7 @@ int gk20a_tsg_open(struct gk20a *g, struct file *filp)
 	tsg->timeslice_us = 0;
 	tsg->timeslice_timeout = 0;
 	tsg->timeslice_scale = 0;
+	tsg->runlist_id = ~0;
 
 	filp->private_data = tsg;
 
@@ -448,6 +459,8 @@ static void gk20a_tsg_release(struct kref *ref)
 
 	release_used_tsg(&g->fifo, tsg);
 
+	tsg->runlist_id = ~0;
+
 	gk20a_dbg(gpu_dbg_fn, "tsg released %d\n", tsg->tsgid);
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
index 14ead5c06..57414690a 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -54,6 +54,8 @@ struct tsg_gk20a {
 
 	struct list_head event_id_list;
 	struct mutex event_id_list_lock;
+
+	u32 runlist_id;
 };
 
 int gk20a_enable_tsg(struct tsg_gk20a *tsg);
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index 9591c72a3..11f389fbc 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -175,6 +175,8 @@ static int init_engine_info(struct fifo_gk20a *f)
 
 	/* FIXME: retrieve this from server */
 	gr_info->runlist_id = 0;
+	f->active_engines_list[0] = gr_sw_id;
+
 	return 0;
 }
 
@@ -281,11 +283,14 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
 	f->tsg = vzalloc(f->num_channels * sizeof(*f->tsg));
 	f->engine_info = kzalloc(f->max_engines * sizeof(*f->engine_info),
 				GFP_KERNEL);
+	f->active_engines_list = kzalloc(f->max_engines * sizeof(u32),
+				GFP_KERNEL);
 
-	if (!(f->channel && f->tsg && f->engine_info)) {
+	if (!(f->channel && f->tsg && f->engine_info && f->active_engines_list)) {
 		err = -ENOMEM;
 		goto clean_up;
 	}
+	memset(f->active_engines_list, 0xff, (f->max_engines * sizeof(u32)));
 
 	init_engine_info(f);
 
@@ -327,6 +332,8 @@ clean_up:
 	f->tsg = NULL;
 	kfree(f->engine_info);
 	f->engine_info = NULL;
+	kfree(f->active_engines_list);
+	f->active_engines_list = NULL;
 
 	return err;
 }
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 9d6495366..992355d97 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -298,7 +298,20 @@ struct nvgpu_gpu_get_tpc_masks_args {
 };
 
 struct nvgpu_gpu_open_channel_args {
-	__s32 channel_fd;
+	union {
+		__s32 channel_fd; /* deprecated: use out.channel_fd instead */
+		struct {
+			 /* runlist_id is the runlist for the
+			  * channel. Basically, the runlist specifies the target
+			  * engine(s) for which the channel is
+			  * opened. Runlist_id -1 is synonym for the primary
+			  * graphics runlist. */
+			__s32 runlist_id;
+		} in;
+		struct {
+			__s32 channel_fd;
+		} out;
+	};
 };
 
 /* L2 cache writeback, optionally invalidate clean lines and flush fb */
@@ -820,7 +833,20 @@ struct nvgpu_get_param_args {
 } __packed;
 
 struct nvgpu_channel_open_args {
-	__s32 channel_fd;
+	union {
+		__s32 channel_fd; /* deprecated: use out.channel_fd instead */
+		struct {
+			 /* runlist_id is the runlist for the
+			  * channel. Basically, the runlist specifies the target
+			  * engine(s) for which the channel is
+			  * opened. Runlist_id -1 is synonym for the primary
+			  * graphics runlist. */
+			__s32 runlist_id;
+		} in;
+		struct {
+			__s32 channel_fd;
+		} out;
+	};
 };
 
 struct nvgpu_set_nvmap_fd_args {