diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 2c2850c63..6eecebf59 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -177,7 +177,7 @@ int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
 }
 
 static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
-				u32 timeslice_period, bool interleave)
+				u32 timeslice_period)
 {
 	void *inst_ptr;
 	int shift = 0, value = 0;
@@ -205,30 +205,6 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
 		gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
 		ccsr_channel_enable_set_true_f());
 
-	if (c->interleave != interleave) {
-		mutex_lock(&c->g->interleave_lock);
-		c->interleave = interleave;
-		if (interleave)
-			if (c->g->num_interleaved_channels >=
-					MAX_INTERLEAVED_CHANNELS) {
-				gk20a_err(dev_from_gk20a(c->g),
-					"Change of priority would exceed runlist length, only changing timeslice\n");
-				c->interleave = false;
-			} else
-				c->g->num_interleaved_channels += 1;
-		else
-			c->g->num_interleaved_channels -= 1;
-
-		mutex_unlock(&c->g->interleave_lock);
-		gk20a_dbg_info("Set channel %d to interleave %d",
-			c->hw_chid, c->interleave);
-
-		gk20a_fifo_set_channel_priority(
-				c->g, 0, c->hw_chid, c->interleave);
-		c->g->ops.fifo.update_runlist(
-				c->g, 0, ~0, true, false);
-	}
-
 	return 0;
 }
 
@@ -711,6 +687,32 @@ static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
 	return 0;
 }
 
+static int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch,
+						u32 level)
+{
+	struct gk20a *g = ch->g;
+	int ret;
+
+	if (gk20a_is_channel_marked_as_tsg(ch)) {
+		gk20a_err(dev_from_gk20a(g), "invalid operation for TSG!\n");
+		return -EINVAL;
+	}
+
+	switch (level) {
+	case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW:
+	case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
+	case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH:
+		ret = g->ops.fifo.set_runlist_interleave(g, ch->hw_chid,
+							false, 0, level);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret ? ret : g->ops.fifo.update_runlist(g, 0, ~0, true, true);
+}
+
 static int gk20a_init_error_notifier(struct channel_gk20a *ch,
 		struct nvgpu_set_error_notifier *args)
 {
@@ -899,17 +901,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
 	}
 	mutex_unlock(&f->deferred_reset_mutex);
 
-	if (ch->interleave) {
-		ch->interleave = false;
-		gk20a_fifo_set_channel_priority(
-				ch->g, 0, ch->hw_chid, ch->interleave);
-
-		mutex_lock(&f->g->interleave_lock);
-		WARN_ON(f->g->num_interleaved_channels == 0);
-		f->g->num_interleaved_channels -= 1;
-		mutex_unlock(&f->g->interleave_lock);
-	}
-
 	if (!ch->bound)
 		goto release;
 
@@ -1154,11 +1145,8 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
 	ch->has_timedout = false;
 	ch->wdt_enabled = true;
 	ch->obj_class = 0;
-	ch->interleave = false;
 	ch->clean_up.scheduled = false;
-	gk20a_fifo_set_channel_priority(
-			ch->g, 0, ch->hw_chid, ch->interleave);
-
+	ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW;
 
 	/* The channel is *not* runnable at this point. It still needs to have
 	 * an address space bound and allocate a gpfifo and grctx. */
@@ -2613,7 +2601,6 @@ unsigned int gk20a_channel_poll(struct file *filep, poll_table *wait)
 int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority)
 {
 	u32 timeslice_timeout;
-	bool interleave = false;
 
 	if (gk20a_is_channel_marked_as_tsg(ch)) {
 		gk20a_err(dev_from_gk20a(ch->g),
@@ -2630,8 +2617,6 @@ int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority)
 		timeslice_timeout = ch->g->timeslice_medium_priority_us;
 		break;
 	case NVGPU_PRIORITY_HIGH:
-		if (ch->g->interleave_high_priority)
-			interleave = true;
 		timeslice_timeout = ch->g->timeslice_high_priority_us;
 		break;
 	default:
@@ -2640,7 +2625,7 @@ int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority)
 	}
 
 	return channel_gk20a_set_schedule_params(ch,
-			timeslice_timeout, interleave);
+			timeslice_timeout);
 }
 
 static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
@@ -3045,6 +3030,18 @@ long gk20a_channel_ioctl(struct file *filp,
 		err = gk20a_channel_set_wdt_status(ch,
 				(struct nvgpu_channel_wdt_args *)buf);
 		break;
+	case NVGPU_IOCTL_CHANNEL_SET_RUNLIST_INTERLEAVE:
+		err = gk20a_busy(dev);
+		if (err) {
+			dev_err(&dev->dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		err = gk20a_channel_set_runlist_interleave(ch,
+			((struct nvgpu_runlist_interleave_args *)buf)->level);
+		gk20a_idle(dev);
+		break;
 	default:
 		dev_dbg(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
 		err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 4aea9d19a..3f5a657ac 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -188,8 +188,7 @@ struct channel_gk20a {
 	spinlock_t update_fn_lock; /* make access to the two above atomic */
 	struct work_struct update_fn_work;
 
-	/* true if channel is interleaved with lower priority channels */
-	bool interleave;
+	u32 interleave_level;
 };
 
 static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 769960af2..28cc3086a 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -303,12 +303,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 	if (!runlist->active_tsgs)
 		goto clean_up_runlist_info;
 
-	runlist->high_prio_channels =
-		kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
-			GFP_KERNEL);
-	if (!runlist->high_prio_channels)
-		goto clean_up_runlist_info;
-
 	runlist_size  = ram_rl_entry_size_v() * f->num_runlist_entries;
 	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
 		int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
@@ -337,9 +331,6 @@ clean_up_runlist_info:
 	kfree(runlist->active_tsgs);
 	runlist->active_tsgs = NULL;
 
-	kfree(runlist->high_prio_channels);
-	runlist->high_prio_channels = NULL;
-
 	kfree(f->runlist_info);
 	f->runlist_info = NULL;
 
@@ -2162,32 +2153,153 @@ static inline u32 gk20a_get_tsg_runlist_entry_0(struct tsg_gk20a *tsg)
 	return runlist_entry_0;
 }
 
-/* add all active high priority channels */
-static inline u32 gk20a_fifo_runlist_add_high_prio_entries(
-		struct fifo_gk20a *f,
-		struct fifo_runlist_info_gk20a *runlist,
-		u32 *runlist_entry)
+/* recursively construct a runlist with interleaved bare channels and TSGs */
+static u32 *gk20a_runlist_construct_locked(struct fifo_gk20a *f,
+				struct fifo_runlist_info_gk20a *runlist,
+				u32 cur_level,
+				u32 *runlist_entry,
+				bool interleave_enabled,
+				bool prev_empty,
+				u32 *entries_left)
 {
-	struct channel_gk20a *ch = NULL;
-	unsigned long high_prio_chid;
-	u32 count = 0;
+	bool last_level = cur_level == NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH;
+	struct channel_gk20a *ch;
+	bool skip_next = false;
+	u32 chid, tsgid, count = 0;
 
-	for_each_set_bit(high_prio_chid,
-			runlist->high_prio_channels, f->num_channels) {
-		ch = &f->channel[high_prio_chid];
+	gk20a_dbg_fn("");
 
-		if (!gk20a_is_channel_marked_as_tsg(ch) &&
-		     test_bit(high_prio_chid, runlist->active_channels) == 1) {
-			gk20a_dbg_info("add high prio channel %lu to runlist",
-					high_prio_chid);
-			runlist_entry[0] = ram_rl_entry_chid_f(high_prio_chid);
+	/* for each bare channel, CH, on this level, insert all higher-level
+	   channels and TSGs before inserting CH. */
+	for_each_set_bit(chid, runlist->active_channels, f->num_channels) {
+		ch = &f->channel[chid];
+
+		if (ch->interleave_level != cur_level)
+			continue;
+
+		if (gk20a_is_channel_marked_as_tsg(ch))
+			continue;
+
+		if (!last_level && !skip_next) {
+			runlist_entry = gk20a_runlist_construct_locked(f,
+							runlist,
+							cur_level + 1,
+							runlist_entry,
+							interleave_enabled,
+							false,
+							entries_left);
+			/* if interleaving is disabled, higher-level channels
+			   and TSGs only need to be inserted once */
+			if (!interleave_enabled)
+				skip_next = true;
+		}
+
+		if (!(*entries_left))
+			return NULL;
+
+		gk20a_dbg_info("add channel %d to runlist", chid);
+		runlist_entry[0] = ram_rl_entry_chid_f(chid);
+		runlist_entry[1] = 0;
+		runlist_entry += 2;
+		count++;
+		(*entries_left)--;
+	}
+
+	/* for each TSG, T, on this level, insert all higher-level channels
+	   and TSGs before inserting T. */
+	for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
+		struct tsg_gk20a *tsg = &f->tsg[tsgid];
+
+		if (tsg->interleave_level != cur_level)
+			continue;
+
+		if (!last_level && !skip_next) {
+			runlist_entry = gk20a_runlist_construct_locked(f,
+							runlist,
+							cur_level + 1,
+							runlist_entry,
+							interleave_enabled,
+							false,
+							entries_left);
+			if (!interleave_enabled)
+				skip_next = true;
+		}
+
+		if (!(*entries_left))
+			return NULL;
+
+		/* add TSG entry */
+		gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
+		runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg);
+		runlist_entry[1] = 0;
+		runlist_entry += 2;
+		count++;
+		(*entries_left)--;
+
+		mutex_lock(&tsg->ch_list_lock);
+		/* add runnable channels bound to this TSG */
+		list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
+			if (!test_bit(ch->hw_chid,
+				      runlist->active_channels))
+				continue;
+
+			if (!(*entries_left)) {
+				mutex_unlock(&tsg->ch_list_lock);
+				return NULL;
+			}
+
+			gk20a_dbg_info("add channel %d to runlist",
+				ch->hw_chid);
+			runlist_entry[0] = ram_rl_entry_chid_f(ch->hw_chid);
 			runlist_entry[1] = 0;
 			runlist_entry += 2;
 			count++;
+			(*entries_left)--;
 		}
+		mutex_unlock(&tsg->ch_list_lock);
 	}
 
-	return count;
+	/* append entries from higher level if this level is empty */
+	if (!count && !last_level)
+		runlist_entry = gk20a_runlist_construct_locked(f,
+							runlist,
+							cur_level + 1,
+							runlist_entry,
+							interleave_enabled,
+							true,
+							entries_left);
+
+	/*
+	 * if previous and this level have entries, append
+	 * entries from higher level.
+	 *
+	 * ex. dropping from MEDIUM to LOW, need to insert HIGH
+	 */
+	if (interleave_enabled && count && !prev_empty && !last_level)
+		runlist_entry = gk20a_runlist_construct_locked(f,
+							runlist,
+							cur_level + 1,
+							runlist_entry,
+							interleave_enabled,
+							false,
+							entries_left);
+	return runlist_entry;
+}
+
+int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
+				u32 id,
+				bool is_tsg,
+				u32 runlist_id,
+				u32 new_level)
+{
+	gk20a_dbg_fn("");
+
+	if (is_tsg)
+		g->fifo.tsg[id].interleave_level = new_level;
+	else
+		g->fifo.channel[id].interleave_level = new_level;
+
+	return 0;
 }
 
 static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
@@ -2198,14 +2310,11 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 	struct fifo_gk20a *f = &g->fifo;
 	struct fifo_runlist_info_gk20a *runlist = NULL;
 	u32 *runlist_entry_base = NULL;
-	u32 *runlist_entry = NULL;
 	u64 runlist_iova;
 	u32 old_buf, new_buf;
-	u32 chid, tsgid;
 	struct channel_gk20a *ch = NULL;
 	struct tsg_gk20a *tsg = NULL;
 	u32 count = 0;
-	u32 count_channels_in_tsg;
 	runlist = &f->runlist_info[runlist_id];
 
 	/* valid channel, add/remove it from active list.
@@ -2254,91 +2363,23 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 
 	if (hw_chid != ~0 || /* add/remove a valid channel */
 	    add /* resume to add all channels back */) {
-		runlist_entry = runlist_entry_base;
+		u32 max_entries = f->num_runlist_entries;
+		u32 *runlist_end;
 
-		/* Runlist manipulation:
-		   Insert an entry of all high priority channels inbetween
-		   all lower priority channels. This ensure that the maximum
-		   delay a runnable high priority channel has to wait is one
-		   medium timeslice + any context switching overhead +
-		   wait on other high priority channels.
-		   add non-TSG channels first */
-		for_each_set_bit(chid,
-			runlist->active_channels, f->num_channels) {
-			ch = &f->channel[chid];
-
-			if (!gk20a_is_channel_marked_as_tsg(ch) &&
-				!ch->interleave) {
-				u32 added;
-
-				gk20a_dbg_info("add normal prio channel %d to runlist",
-					chid);
-				runlist_entry[0] = ram_rl_entry_chid_f(chid);
-				runlist_entry[1] = 0;
-				runlist_entry += 2;
-				count++;
-
-				added =	gk20a_fifo_runlist_add_high_prio_entries(
-						f,
+		runlist_end = gk20a_runlist_construct_locked(f,
 						runlist,
-						runlist_entry);
-				count += added;
-				runlist_entry += 2 * added;
-			}
+						0,
+						runlist_entry_base,
+						g->runlist_interleave,
+						true,
+						&max_entries);
+		if (!runlist_end) {
+			ret = -E2BIG;
+			goto clean_up;
 		}
 
-		/* if there were no lower priority channels, then just
-		 * add the high priority channels once. */
-		if (count == 0) {
-			count =	gk20a_fifo_runlist_add_high_prio_entries(
-					f,
-					runlist,
-					runlist_entry);
-			runlist_entry += 2 * count;
-		}
-
-		/* now add TSG entries and channels bound to TSG */
-		mutex_lock(&f->tsg_inuse_mutex);
-		for_each_set_bit(tsgid,
-				runlist->active_tsgs, f->num_channels) {
-			u32 added;
-			tsg = &f->tsg[tsgid];
-			/* add TSG entry */
-			gk20a_dbg_info("add TSG %d to runlist", tsg->tsgid);
-			runlist_entry[0] = gk20a_get_tsg_runlist_entry_0(tsg);
-			runlist_entry[1] = 0;
-			runlist_entry += 2;
-			count++;
-
-			/* add runnable channels bound to this TSG */
-			count_channels_in_tsg = 0;
-			mutex_lock(&tsg->ch_list_lock);
-			list_for_each_entry(ch, &tsg->ch_list, ch_entry) {
-				if (!test_bit(ch->hw_chid,
-						runlist->active_channels))
-					continue;
-				gk20a_dbg_info("add channel %d to runlist",
-					ch->hw_chid);
-				runlist_entry[0] =
-					ram_rl_entry_chid_f(ch->hw_chid);
-				runlist_entry[1] = 0;
-				runlist_entry += 2;
-				count++;
-				count_channels_in_tsg++;
-			}
-			mutex_unlock(&tsg->ch_list_lock);
-
-			WARN_ON(tsg->num_active_channels !=
-				count_channels_in_tsg);
-
-			added = gk20a_fifo_runlist_add_high_prio_entries(
-					f,
-					runlist,
-					runlist_entry);
-			count += added;
-			runlist_entry += 2 * added;
-		}
-		mutex_unlock(&f->tsg_inuse_mutex);
+		count = (runlist_end - runlist_entry_base) / 2;
+		WARN_ON(count > f->num_runlist_entries);
 	} else	/* suspend to remove all channels */
 		count = 0;
 
@@ -2493,42 +2534,6 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g)
 	return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f();
 }
 
-int gk20a_fifo_set_channel_priority(
-		struct gk20a *g,
-		u32 runlist_id,
-		u32 hw_chid,
-		bool interleave)
-{
-	struct fifo_runlist_info_gk20a *runlist = NULL;
-	struct fifo_gk20a *f = &g->fifo;
-	struct channel_gk20a *ch = NULL;
-
-	if (hw_chid >= f->num_channels)
-		return -EINVAL;
-
-	if (runlist_id >= f->max_runlists)
-		return -EINVAL;
-
-	ch = &f->channel[hw_chid];
-
-	gk20a_dbg_fn("");
-
-	runlist = &f->runlist_info[runlist_id];
-
-	mutex_lock(&runlist->mutex);
-
-	if (ch->interleave)
-		set_bit(hw_chid, runlist->high_prio_channels);
-	else
-		clear_bit(hw_chid, runlist->high_prio_channels);
-
-	gk20a_dbg_fn("done");
-
-	mutex_unlock(&runlist->mutex);
-
-	return 0;
-}
-
 struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
 		u32 hw_chid)
 {
@@ -2545,4 +2550,5 @@ void gk20a_init_fifo(struct gpu_ops *gops)
 	gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
 	gops->fifo.get_num_fifos = gk20a_fifo_get_num_fifos;
 	gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature;
+	gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index ee4e7328e..0979bf2b2 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -31,7 +31,6 @@
 struct fifo_runlist_info_gk20a {
 	unsigned long *active_channels;
 	unsigned long *active_tsgs;
-	unsigned long *high_prio_channels;
 	/* Each engine has its own SW and HW runlist buffer.*/
 	struct mem_desc mem[MAX_RUNLIST_BUFFERS];
 	u32  cur_buffer;
@@ -184,8 +183,6 @@ void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
 int gk20a_fifo_wait_engine_idle(struct gk20a *g);
 u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g);
 u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g);
-int gk20a_fifo_set_channel_priority(struct gk20a *g, u32 runlist_id,
-		u32 hw_chid, bool interleave);
 u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
 		int *__id, bool *__is_tsg);
 bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
@@ -198,4 +195,9 @@ struct channel_gk20a *gk20a_fifo_channel_from_hw_chid(struct gk20a *g,
 		u32 hw_chid);
 
 void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg);
+int gk20a_fifo_set_runlist_interleave(struct gk20a *g,
+				u32 id,
+				bool is_tsg,
+				u32 runlist_id,
+				u32 new_level);
 #endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index fa2c61e19..0fee58e85 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -672,9 +672,6 @@ static int gk20a_init_support(struct platform_device *dev)
 	mutex_init(&g->ch_wdt_lock);
 	mutex_init(&g->poweroff_lock);
 
-	mutex_init(&g->interleave_lock);
-	g->num_interleaved_channels = 0;
-
 	g->remove_support = gk20a_remove_support;
 	return 0;
 
@@ -1439,14 +1436,11 @@ static int gk20a_probe(struct platform_device *dev)
 	if (tegra_platform_is_silicon())
 		gk20a->timeouts_enabled = true;
 
-	gk20a->interleave_high_priority = true;
+	gk20a->runlist_interleave = true;
 
 	gk20a->timeslice_low_priority_us = 1300;
 	gk20a->timeslice_medium_priority_us = 2600;
-	if (gk20a->interleave_high_priority)
-		gk20a->timeslice_high_priority_us = 3000;
-	else
-		gk20a->timeslice_high_priority_us = 5200;
+	gk20a->timeslice_high_priority_us = 5200;
 
 	/* Set up initial power settings. For non-slicon platforms, disable *
 	 * power features and for silicon platforms, read from platform data */
@@ -1527,11 +1521,11 @@ static int gk20a_probe(struct platform_device *dev)
 					platform->debugfs,
 					&gk20a->timeslice_high_priority_us);
 
-	gk20a->debugfs_interleave_high_priority =
-			debugfs_create_bool("interleave_high_priority",
+	gk20a->debugfs_runlist_interleave =
+			debugfs_create_bool("runlist_interleave",
 					S_IRUGO|S_IWUSR,
 					platform->debugfs,
-					&gk20a->interleave_high_priority);
+					&gk20a->runlist_interleave);
 
 	gr_gk20a_debugfs_init(gk20a);
 	gk20a_pmu_debugfs_init(dev);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index afdbeef7e..faccf04a4 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -54,8 +54,6 @@ struct acr_gm20b;
     32 ns is the resolution of ptimer. */
 #define PTIMER_REF_FREQ_HZ                      31250000
 
-#define MAX_INTERLEAVED_CHANNELS                32
-
 struct cooling_device_gk20a {
 	struct thermal_cooling_device *gk20a_cooling_dev;
 	unsigned int gk20a_freq_state;
@@ -268,6 +266,9 @@ struct gpu_ops {
 		u32 (*get_num_fifos)(struct gk20a *g);
 		u32 (*get_pbdma_signature)(struct gk20a *g);
 		int (*channel_set_priority)(struct channel_gk20a *ch, u32 priority);
+		int (*set_runlist_interleave)(struct gk20a *g, u32 id,
+					bool is_tsg, u32 runlist_id,
+					u32 new_level);
 	} fifo;
 	struct pmu_v {
 		/*used for change of enum zbc update cmd id from ver 0 to ver1*/
@@ -536,10 +537,7 @@ struct gk20a {
 	u32 timeslice_low_priority_us;
 	u32 timeslice_medium_priority_us;
 	u32 timeslice_high_priority_us;
-	u32 interleave_high_priority;
-
-	struct mutex interleave_lock;
-	u32 num_interleaved_channels;
+	u32 runlist_interleave;
 
 	bool slcg_enabled;
 	bool blcg_enabled;
@@ -564,7 +562,7 @@ struct gk20a {
 	struct dentry *debugfs_timeslice_low_priority_us;
 	struct dentry *debugfs_timeslice_medium_priority_us;
 	struct dentry *debugfs_timeslice_high_priority_us;
-	struct dentry *debugfs_interleave_high_priority;
+	struct dentry *debugfs_runlist_interleave;
 
 #endif
 	struct gk20a_ctxsw_ucode_info ctxsw_ucode_info;
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
index 4421744cc..b41cca08c 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -228,6 +228,7 @@ int gk20a_tsg_open(struct gk20a *g, struct file *filp)
 
 	tsg->tsg_gr_ctx = NULL;
 	tsg->vm = NULL;
+	tsg->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW;
 
 	filp->private_data = tsg;
 
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
index bcc4d0c40..7e0a75d14 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -49,6 +49,8 @@ struct tsg_gk20a {
 	struct gr_ctx_desc *tsg_gr_ctx;
 
 	struct vm_gk20a *vm;
+
+	u32 interleave_level;
 };
 
 int gk20a_enable_tsg(struct tsg_gk20a *tsg);
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index d1deffb93..3fded03c2 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -1,7 +1,7 @@
 /*
  * GM20B Fifo
  *
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -121,4 +121,5 @@ void gm20b_init_fifo(struct gpu_ops *gops)
 	gops->fifo.wait_engine_idle = gk20a_fifo_wait_engine_idle;
 	gops->fifo.get_num_fifos = gm20b_fifo_get_num_fifos;
 	gops->fifo.get_pbdma_signature = gk20a_fifo_get_pbdma_signature;
+	gops->fifo.set_runlist_interleave = gk20a_fifo_set_runlist_interleave;
 }
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index e776e97cb..b4bb7f387 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -1,7 +1,7 @@
 /*
  * Virtualized GPU Fifo
  *
- * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -194,12 +194,6 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 	if (!runlist->active_channels)
 		goto clean_up_runlist_info;
 
-	runlist->high_prio_channels =
-		kzalloc(DIV_ROUND_UP(f->num_channels, BITS_PER_BYTE),
-			GFP_KERNEL);
-	if (!runlist->high_prio_channels)
-		goto clean_up_runlist_info;
-
 	runlist_size  = sizeof(u16) * f->num_channels;
 	for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
 		int err = gk20a_gmmu_alloc(g, runlist_size, &runlist->mem[i]);
@@ -222,9 +216,6 @@ clean_up_runlist:
 		gk20a_gmmu_free(g, &runlist->mem[i]);
 
 clean_up_runlist_info:
-	kfree(runlist->high_prio_channels);
-	runlist->high_prio_channels = NULL;
-
 	kfree(runlist->active_channels);
 	runlist->active_channels = NULL;
 
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 442a84acb..0787d4e42 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -834,6 +834,28 @@ struct nvgpu_channel_wdt_args {
 #define NVGPU_IOCTL_CHANNEL_DISABLE_WDT		1
 #define NVGPU_IOCTL_CHANNEL_ENABLE_WDT		2
 
+/*
+ * Interleaving channels in a runlist is an approach to improve
+ * GPU scheduling by allowing certain channels to appear multiple
+ * times on the runlist. The number of times a channel appears is
+ * governed by the following levels:
+ *
+ * low (L)   : appears once
+ * medium (M): if L, appears L times
+ *             else, appears once
+ * high (H)  : if L, appears (M + 1) x L times
+ *             else if M, appears M times
+ *             else, appears once
+ */
+struct nvgpu_runlist_interleave_args {
+	__u32 level;
+	__u32 reserved;
+};
+#define NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW	0
+#define NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM	1
+#define NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH	2
+#define NVGPU_RUNLIST_INTERLEAVE_NUM_LEVELS	3
+
 #define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD	\
 	_IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args)
 #define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT	\
@@ -876,9 +898,11 @@ struct nvgpu_channel_wdt_args {
 	_IOWR(NVGPU_IOCTL_MAGIC, 118, struct nvgpu_cycle_stats_snapshot_args)
 #define NVGPU_IOCTL_CHANNEL_WDT \
 	_IOW(NVGPU_IOCTL_MAGIC, 119, struct nvgpu_channel_wdt_args)
+#define NVGPU_IOCTL_CHANNEL_SET_RUNLIST_INTERLEAVE \
+	_IOW(NVGPU_IOCTL_MAGIC, 120, struct nvgpu_runlist_interleave_args)
 
 #define NVGPU_IOCTL_CHANNEL_LAST	\
-	_IOC_NR(NVGPU_IOCTL_CHANNEL_WDT)
+	_IOC_NR(NVGPU_IOCTL_CHANNEL_SET_RUNLIST_INTERLEAVE)
 #define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_submit_gpfifo_args)
 
 /*