drm/tegra: Merge upstream changes

Merge upstream changes from linux-next, including merged version of new UAPI. Change-Id: I4f591d39e51ac6ab6877a0bd428adf166eca3c55 Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvidia/+/2653095 Tested-by: Jonathan Hunter <jonathanh@nvidia.com> Reviewed-by: Jonathan Hunter <jonathanh@nvidia.com> Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com> GVS: Gerrit_Virtual_Submit
2025-12-22 09:11:26 +03:00 · 2021-09-02 15:05:23 +03:00
parent a6ff2bcf9e
commit 02b028d02a
30 changed files with 1846 additions and 1034 deletions
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile
@@ -9,10 +9,9 @@ ccflags-y += -I$(srctree.host1x)/include

 tegra-drm-next-y := \
 	drm.o \
-	uapi/uapi.o \
-	uapi/submit.o \
-	uapi/firewall.o \
-	uapi/gather_bo.o \
+	uapi.o \
+	submit.o \
+	firewall.o \
 	gem.o \
 	fb.o \
 	dp.o \
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -8,6 +8,7 @@
 #include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/iommu.h>
+#include <linux/interconnect.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/pm_runtime.h>
@@ -625,9 +626,14 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 	struct tegra_dc *dc = to_tegra_dc(new_plane_state->crtc);
 	int err;

+	plane_state->peak_memory_bandwidth = 0;
+	plane_state->avg_memory_bandwidth = 0;
+
 	/* no need for further checks if the plane is being disabled */
-	if (!new_plane_state->crtc)
+	if (!new_plane_state->crtc) {
+		plane_state->total_peak_memory_bandwidth = 0;
 		return 0;
+	}

 	err = tegra_plane_format(new_plane_state->fb->format->format,
 				 &plane_state->format,
@@ -830,6 +836,12 @@ static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm,
 	formats = dc->soc->primary_formats;
 	modifiers = dc->soc->modifiers;

+	err = tegra_plane_interconnect_init(plane);
+	if (err) {
+		kfree(plane);
+		return ERR_PTR(err);
+	}
+
 	err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
 				       &tegra_plane_funcs, formats,
 				       num_formats, modifiers, type, NULL);
@@ -872,12 +884,18 @@ static int tegra_cursor_atomic_check(struct drm_plane *plane,
 				     struct drm_plane_state *new_plane_state)
 {
 #endif
+	struct tegra_plane_state *plane_state = to_tegra_plane_state(new_plane_state);
 	struct tegra_plane *tegra = to_tegra_plane(plane);
 	int err;

+	plane_state->peak_memory_bandwidth = 0;
+	plane_state->avg_memory_bandwidth = 0;
+
 	/* no need for further checks if the plane is being disabled */
-	if (!new_plane_state->crtc)
+	if (!new_plane_state->crtc) {
+		plane_state->total_peak_memory_bandwidth = 0;
 		return 0;
+	}

 	/* scaling not supported for cursor */
 	if ((new_plane_state->src_w >> 16 != new_plane_state->crtc_w) ||
@@ -1165,6 +1183,12 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm,
 	if (!dc->soc->has_nvdisplay) {
 		num_formats = ARRAY_SIZE(tegra_legacy_cursor_plane_formats);
 		formats = tegra_legacy_cursor_plane_formats;
+
+		err = tegra_plane_interconnect_init(plane);
+		if (err) {
+			kfree(plane);
+			return ERR_PTR(err);
+		}
 	} else {
 		num_formats = ARRAY_SIZE(tegra_cursor_plane_formats);
 		formats = tegra_cursor_plane_formats;
@@ -1288,6 +1312,12 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
 	num_formats = dc->soc->num_overlay_formats;
 	formats = dc->soc->overlay_formats;

+	err = tegra_plane_interconnect_init(plane);
+	if (err) {
+		kfree(plane);
+		return ERR_PTR(err);
+	}
+
 	if (!cursor)
 		type = DRM_PLANE_TYPE_OVERLAY;
 	else
@@ -1715,6 +1745,11 @@ static int tegra_dc_show_stats(struct seq_file *s, void *data)
 	seq_printf(s, "underflow: %lu\n", dc->stats.underflow);
 	seq_printf(s, "overflow: %lu\n", dc->stats.overflow);

+	seq_printf(s, "frames total: %lu\n", dc->stats.frames_total);
+	seq_printf(s, "vblank total: %lu\n", dc->stats.vblank_total);
+	seq_printf(s, "underflow total: %lu\n", dc->stats.underflow_total);
+	seq_printf(s, "overflow total: %lu\n", dc->stats.overflow_total);
+
 	return 0;
 }

@@ -1947,6 +1982,106 @@ static int tegra_dc_wait_idle(struct tegra_dc *dc, unsigned long timeout)
 	return -ETIMEDOUT;
 }

+static void
+tegra_crtc_update_memory_bandwidth(struct drm_crtc *crtc,
+				   struct drm_atomic_state *state,
+				   bool prepare_bandwidth_transition)
+{
+	const struct tegra_plane_state *old_tegra_state, *new_tegra_state;
+	const struct tegra_dc_state *old_dc_state, *new_dc_state;
+	u32 i, new_avg_bw, old_avg_bw, new_peak_bw, old_peak_bw;
+	const struct drm_plane_state *old_plane_state;
+	const struct drm_crtc_state *old_crtc_state;
+	struct tegra_dc_window window, old_window;
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	struct tegra_plane *tegra;
+	struct drm_plane *plane;
+
+	if (dc->soc->has_nvdisplay)
+		return;
+
+	old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc);
+	old_dc_state = to_const_dc_state(old_crtc_state);
+	new_dc_state = to_const_dc_state(crtc->state);
+
+	if (!crtc->state->active) {
+		if (!old_crtc_state->active)
+			return;
+
+		/*
+		 * When CRTC is disabled on DPMS, the state of attached planes
+		 * is kept unchanged. Hence we need to enforce removal of the
+		 * bandwidths from the ICC paths.
+		 */
+		drm_atomic_crtc_for_each_plane(plane, crtc) {
+			tegra = to_tegra_plane(plane);
+
+			icc_set_bw(tegra->icc_mem, 0, 0);
+			icc_set_bw(tegra->icc_mem_vfilter, 0, 0);
+		}
+
+		return;
+	}
+
+	for_each_old_plane_in_state(old_crtc_state->state, plane,
+				    old_plane_state, i) {
+		old_tegra_state = to_const_tegra_plane_state(old_plane_state);
+		new_tegra_state = to_const_tegra_plane_state(plane->state);
+		tegra = to_tegra_plane(plane);
+
+		/*
+		 * We're iterating over the global atomic state and it contains
+		 * planes from another CRTC, hence we need to filter out the
+		 * planes unrelated to this CRTC.
+		 */
+		if (tegra->dc != dc)
+			continue;
+
+		new_avg_bw = new_tegra_state->avg_memory_bandwidth;
+		old_avg_bw = old_tegra_state->avg_memory_bandwidth;
+
+		new_peak_bw = new_tegra_state->total_peak_memory_bandwidth;
+		old_peak_bw = old_tegra_state->total_peak_memory_bandwidth;
+
+		/*
+		 * See the comment related to !crtc->state->active above,
+		 * which explains why bandwidths need to be updated when
+		 * CRTC is turning ON.
+		 */
+		if (new_avg_bw == old_avg_bw && new_peak_bw == old_peak_bw &&
+		    old_crtc_state->active)
+			continue;
+
+		window.src.h = drm_rect_height(&plane->state->src) >> 16;
+		window.dst.h = drm_rect_height(&plane->state->dst);
+
+		old_window.src.h = drm_rect_height(&old_plane_state->src) >> 16;
+		old_window.dst.h = drm_rect_height(&old_plane_state->dst);
+
+		/*
+		 * During the preparation phase (atomic_begin), the memory
+		 * freq should go high before the DC changes are committed
+		 * if bandwidth requirement goes up, otherwise memory freq
+		 * should to stay high if BW requirement goes down.  The
+		 * opposite applies to the completion phase (post_commit).
+		 */
+		if (prepare_bandwidth_transition) {
+			new_avg_bw = max(old_avg_bw, new_avg_bw);
+			new_peak_bw = max(old_peak_bw, new_peak_bw);
+
+			if (tegra_plane_use_vertical_filtering(tegra, &old_window))
+				window = old_window;
+		}
+
+		icc_set_bw(tegra->icc_mem, new_avg_bw, new_peak_bw);
+
+		if (tegra_plane_use_vertical_filtering(tegra, &window))
+			icc_set_bw(tegra->icc_mem_vfilter, new_avg_bw, new_peak_bw);
+		else
+			icc_set_bw(tegra->icc_mem_vfilter, 0, 0);
+	}
+}
+
 static void tegra_crtc_atomic_disable(struct drm_crtc *crtc,
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
 				      struct drm_atomic_state *state)
@@ -2140,6 +2275,8 @@ static void tegra_crtc_atomic_begin(struct drm_crtc *crtc,
 {
 	unsigned long flags;

+	tegra_crtc_update_memory_bandwidth(crtc, state, true);
+
 	if (crtc->state->event) {
 		spin_lock_irqsave(&crtc->dev->event_lock, flags);

@@ -2180,7 +2317,207 @@ static void tegra_crtc_atomic_flush(struct drm_crtc *crtc,
 	value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
 }

+static bool tegra_plane_is_cursor(const struct drm_plane_state *state)
+{
+	const struct tegra_dc_soc_info *soc = to_tegra_dc(state->crtc)->soc;
+	const struct drm_format_info *fmt = state->fb->format;
+	unsigned int src_w = drm_rect_width(&state->src) >> 16;
+	unsigned int dst_w = drm_rect_width(&state->dst);
+
+	if (state->plane->type != DRM_PLANE_TYPE_CURSOR)
+		return false;
+
+	if (soc->supports_cursor)
+		return true;
+
+	if (src_w != dst_w || fmt->num_planes != 1 || src_w * fmt->cpp[0] > 256)
+		return false;
+
+	return true;
+}
+
+static unsigned long
+tegra_plane_overlap_mask(struct drm_crtc_state *state,
+			 const struct drm_plane_state *plane_state)
+{
+	const struct drm_plane_state *other_state;
+	const struct tegra_plane *tegra;
+	unsigned long overlap_mask = 0;
+	struct drm_plane *plane;
+	struct drm_rect rect;
+
+	if (!plane_state->visible || !plane_state->fb)
+		return 0;
+
+	/*
+	 * Data-prefetch FIFO will easily help to overcome temporal memory
+	 * pressure if other plane overlaps with the cursor plane.
+	 */
+	if (tegra_plane_is_cursor(plane_state))
+		return 0;
+
+	drm_atomic_crtc_state_for_each_plane_state(plane, other_state, state) {
+		rect = plane_state->dst;
+
+		tegra = to_tegra_plane(other_state->plane);
+
+		if (!other_state->visible || !other_state->fb)
+			continue;
+
+		/*
+		 * Ignore cursor plane overlaps because it's not practical to
+		 * assume that it contributes to the bandwidth in overlapping
+		 * area if window width is small.
+		 */
+		if (tegra_plane_is_cursor(other_state))
+			continue;
+
+		if (drm_rect_intersect(&rect, &other_state->dst))
+			overlap_mask |= BIT(tegra->index);
+	}
+
+	return overlap_mask;
+}
+
+static int tegra_crtc_calculate_memory_bandwidth(struct drm_crtc *crtc,
+						 struct drm_atomic_state *state)
+{
+	ulong overlap_mask[TEGRA_DC_LEGACY_PLANES_NUM] = {}, mask;
+	u32 plane_peak_bw[TEGRA_DC_LEGACY_PLANES_NUM] = {};
+	bool all_planes_overlap_simultaneously = true;
+	const struct tegra_plane_state *tegra_state;
+	const struct drm_plane_state *plane_state;
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	const struct drm_crtc_state *old_state;
+	struct drm_crtc_state *new_state;
+	struct tegra_plane *tegra;
+	struct drm_plane *plane;
+
+	/*
+	 * The nv-display uses shared planes.  The algorithm below assumes
+	 * maximum 3 planes per-CRTC, this assumption isn't applicable to
+	 * the nv-display.  Note that T124 support has additional windows,
+	 * but currently they aren't supported by the driver.
+	 */
+	if (dc->soc->has_nvdisplay)
+		return 0;
+
+	new_state = drm_atomic_get_new_crtc_state(state, crtc);
+	old_state = drm_atomic_get_old_crtc_state(state, crtc);
+
+	/*
+	 * For overlapping planes pixel's data is fetched for each plane at
+	 * the same time, hence bandwidths are accumulated in this case.
+	 * This needs to be taken into account for calculating total bandwidth
+	 * consumed by all planes.
+	 *
+	 * Here we get the overlapping state of each plane, which is a
+	 * bitmask of plane indices telling with what planes there is an
+	 * overlap. Note that bitmask[plane] includes BIT(plane) in order
+	 * to make further code nicer and simpler.
+	 */
+	drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) {
+		tegra_state = to_const_tegra_plane_state(plane_state);
+		tegra = to_tegra_plane(plane);
+
+		if (WARN_ON_ONCE(tegra->index >= TEGRA_DC_LEGACY_PLANES_NUM))
+			return -EINVAL;
+
+		plane_peak_bw[tegra->index] = tegra_state->peak_memory_bandwidth;
+		mask = tegra_plane_overlap_mask(new_state, plane_state);
+		overlap_mask[tegra->index] = mask;
+
+		if (hweight_long(mask) != 3)
+			all_planes_overlap_simultaneously = false;
+	}
+
+	/*
+	 * Then we calculate maximum bandwidth of each plane state.
+	 * The bandwidth includes the plane BW + BW of the "simultaneously"
+	 * overlapping planes, where "simultaneously" means areas where DC
+	 * fetches from the planes simultaneously during of scan-out process.
+	 *
+	 * For example, if plane A overlaps with planes B and C, but B and C
+	 * don't overlap, then the peak bandwidth will be either in area where
+	 * A-and-B or A-and-C planes overlap.
+	 *
+	 * The plane_peak_bw[] contains peak memory bandwidth values of
+	 * each plane, this information is needed by interconnect provider
+	 * in order to set up latency allowance based on the peak BW, see
+	 * tegra_crtc_update_memory_bandwidth().
+	 */
+	drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) {
+		u32 i, old_peak_bw, new_peak_bw, overlap_bw = 0;
+
+		/*
+		 * Note that plane's atomic check doesn't touch the
+		 * total_peak_memory_bandwidth of enabled plane, hence the
+		 * current state contains the old bandwidth state from the
+		 * previous CRTC commit.
+		 */
+		tegra_state = to_const_tegra_plane_state(plane_state);
+		tegra = to_tegra_plane(plane);
+
+		for_each_set_bit(i, &overlap_mask[tegra->index], 3) {
+			if (i == tegra->index)
+				continue;
+
+			if (all_planes_overlap_simultaneously)
+				overlap_bw += plane_peak_bw[i];
+			else
+				overlap_bw = max(overlap_bw, plane_peak_bw[i]);
+		}
+
+		new_peak_bw = plane_peak_bw[tegra->index] + overlap_bw;
+		old_peak_bw = tegra_state->total_peak_memory_bandwidth;
+
+		/*
+		 * If plane's peak bandwidth changed (for example plane isn't
+		 * overlapped anymore) and plane isn't in the atomic state,
+		 * then add plane to the state in order to have the bandwidth
+		 * updated.
+		 */
+		if (old_peak_bw != new_peak_bw) {
+			struct tegra_plane_state *new_tegra_state;
+			struct drm_plane_state *new_plane_state;
+
+			new_plane_state = drm_atomic_get_plane_state(state, plane);
+			if (IS_ERR(new_plane_state))
+				return PTR_ERR(new_plane_state);
+
+			new_tegra_state = to_tegra_plane_state(new_plane_state);
+			new_tegra_state->total_peak_memory_bandwidth = new_peak_bw;
+		}
+	}
+
+	return 0;
+}
+
+static int tegra_crtc_atomic_check(struct drm_crtc *crtc,
+				   struct drm_atomic_state *state)
+{
+	int err;
+
+	err = tegra_crtc_calculate_memory_bandwidth(crtc, state);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc,
+				   struct drm_atomic_state *state)
+{
+	/*
+	 * Display bandwidth is allowed to go down only once hardware state
+	 * is known to be armed, i.e. state was committed and VBLANK event
+	 * received.
+	 */
+	tegra_crtc_update_memory_bandwidth(crtc, state, false);
+}
+
 static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = {
+	.atomic_check = tegra_crtc_atomic_check,
 	.atomic_begin = tegra_crtc_atomic_begin,
 	.atomic_flush = tegra_crtc_atomic_flush,
 	.atomic_enable = tegra_crtc_atomic_enable,
@@ -2199,6 +2536,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
 		/*
 		dev_dbg(dc->dev, "%s(): frame end\n", __func__);
 		*/
+		dc->stats.frames_total++;
 		dc->stats.frames++;
 	}

@@ -2207,6 +2545,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
 		dev_dbg(dc->dev, "%s(): vertical blank\n", __func__);
 		*/
 		drm_crtc_handle_vblank(&dc->base);
+		dc->stats.vblank_total++;
 		dc->stats.vblank++;
 	}

@@ -2214,6 +2553,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
 		/*
 		dev_dbg(dc->dev, "%s(): underflow\n", __func__);
 		*/
+		dc->stats.underflow_total++;
 		dc->stats.underflow++;
 	}

@@ -2221,11 +2561,13 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
 		/*
 		dev_dbg(dc->dev, "%s(): overflow\n", __func__);
 		*/
+		dc->stats.overflow_total++;
 		dc->stats.overflow++;
 	}

 	if (status & HEAD_UF_INT) {
 		dev_dbg_ratelimited(dc->dev, "%s(): head underflow\n", __func__);
+		dc->stats.underflow_total++;
 		dc->stats.underflow++;
 	}

@@ -2444,8 +2786,14 @@ static int tegra_dc_runtime_resume(struct host1x_client *client)
 	struct device *dev = client->dev;
 	int err;

+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
+	err = pm_runtime_resume_and_get(dev);
+	if (err < 0) {
+#else
 	err = pm_runtime_get_sync(dev);
 	if (err < 0) {
+		pm_runtime_put_noidle(dev);
+#endif
 		dev_err(dev, "failed to get runtime PM: %d\n", err);
 		return err;
 	}
@@ -2506,7 +2854,9 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = {
 	.overlay_formats = tegra20_overlay_formats,
 	.modifiers = tegra20_modifiers,
 	.has_win_a_without_filters = true,
+	.has_win_b_vfilter_mem_client = true,
 	.has_win_c_without_vert_filter = true,
+	.plane_tiled_memory_bandwidth_x2 = false,
 };

 static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
@@ -2526,7 +2876,9 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
 	.overlay_formats = tegra20_overlay_formats,
 	.modifiers = tegra20_modifiers,
 	.has_win_a_without_filters = false,
+	.has_win_b_vfilter_mem_client = true,
 	.has_win_c_without_vert_filter = false,
+	.plane_tiled_memory_bandwidth_x2 = true,
 };

 static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
@@ -2546,7 +2898,9 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
 	.overlay_formats = tegra114_overlay_formats,
 	.modifiers = tegra20_modifiers,
 	.has_win_a_without_filters = false,
+	.has_win_b_vfilter_mem_client = false,
 	.has_win_c_without_vert_filter = false,
+	.plane_tiled_memory_bandwidth_x2 = true,
 };

 static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
@@ -2566,7 +2920,9 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
 	.overlay_formats = tegra124_overlay_formats,
 	.modifiers = tegra124_modifiers,
 	.has_win_a_without_filters = false,
+	.has_win_b_vfilter_mem_client = false,
 	.has_win_c_without_vert_filter = false,
+	.plane_tiled_memory_bandwidth_x2 = false,
 };

 static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
@@ -2586,7 +2942,9 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
 	.overlay_formats = tegra114_overlay_formats,
 	.modifiers = tegra124_modifiers,
 	.has_win_a_without_filters = false,
+	.has_win_b_vfilter_mem_client = false,
 	.has_win_c_without_vert_filter = false,
+	.plane_tiled_memory_bandwidth_x2 = false,
 };

 static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = {
@@ -2636,6 +2994,7 @@ static const struct tegra_dc_soc_info tegra186_dc_soc_info = {
 	.has_nvdisplay = true,
 	.wgrps = tegra186_dc_wgrps,
 	.num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps),
+	.plane_tiled_memory_bandwidth_x2 = false,
 };

 static const struct tegra_windowgroup_soc tegra194_dc_wgrps[] = {
@@ -2685,6 +3044,7 @@ static const struct tegra_dc_soc_info tegra194_dc_soc_info = {
 	.has_nvdisplay = true,
 	.wgrps = tegra194_dc_wgrps,
 	.num_wgrps = ARRAY_SIZE(tegra194_dc_wgrps),
+	.plane_tiled_memory_bandwidth_x2 = false,
 };

 static const struct of_device_id tegra_dc_of_match[] = {
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -15,6 +15,8 @@

 struct tegra_output;

+#define TEGRA_DC_LEGACY_PLANES_NUM	7
+
 struct tegra_dc_state {
 	struct drm_crtc_state base;

@@ -33,11 +35,22 @@ static inline struct tegra_dc_state *to_dc_state(struct drm_crtc_state *state)
 	return NULL;
 }

+static inline const struct tegra_dc_state *
+to_const_dc_state(const struct drm_crtc_state *state)
+{
+	return to_dc_state((struct drm_crtc_state *)state);
+}
+
 struct tegra_dc_stats {
 	unsigned long frames;
 	unsigned long vblank;
 	unsigned long underflow;
 	unsigned long overflow;
+
+	unsigned long frames_total;
+	unsigned long vblank_total;
+	unsigned long underflow_total;
+	unsigned long overflow_total;
 };

 struct tegra_windowgroup_soc {
@@ -66,7 +79,9 @@ struct tegra_dc_soc_info {
 	unsigned int num_overlay_formats;
 	const u64 *modifiers;
 	bool has_win_a_without_filters;
+	bool has_win_b_vfilter_mem_client;
 	bool has_win_c_without_vert_filter;
+	bool plane_tiled_memory_bandwidth_x2;
 };

 struct tegra_dc {
@@ -149,6 +164,8 @@ int tegra_dc_state_setup_clock(struct tegra_dc *dc,
 			       struct drm_crtc_state *crtc_state,
 			       struct clk *clk, unsigned long pclk,
 			       unsigned int div);
+void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc,
+				   struct drm_atomic_state *state);

 /* from rgb.c */
 int tegra_dc_rgb_probe(struct tegra_dc *dc);
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -464,10 +464,8 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 		return PTR_ERR(dpaux->regs);

 	dpaux->irq = platform_get_irq(pdev, 0);
-	if (dpaux->irq < 0) {
-		dev_err(&pdev->dev, "failed to get IRQ\n");
+	if (dpaux->irq < 0)
 		return -ENXIO;
-	}

 	if (!pdev->dev.pm_domain) {
 		dpaux->rst = devm_reset_control_get(&pdev->dev, "dpaux");
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -24,14 +24,15 @@
 #include <drm/drm_prime.h>
 #include <drm/drm_vblank.h>

-#include "uapi.h"
+#include "dc.h"
 #include "drm.h"
 #include "gem.h"
+#include "uapi.h"

 #define DRIVER_NAME "tegra"
 #define DRIVER_DESC "NVIDIA Tegra graphics"
 #define DRIVER_DATE "20120330"
-#define DRIVER_MAJOR 0
+#define DRIVER_MAJOR 1
 #define DRIVER_MINOR 0
 #define DRIVER_PATCHLEVEL 0

@@ -59,6 +60,17 @@ static const struct drm_mode_config_funcs tegra_drm_mode_config_funcs = {
 	.atomic_commit = drm_atomic_helper_commit,
 };

+static void tegra_atomic_post_commit(struct drm_device *drm,
+				     struct drm_atomic_state *old_state)
+{
+	struct drm_crtc_state *old_crtc_state __maybe_unused;
+	struct drm_crtc *crtc;
+	unsigned int i;
+
+	for_each_old_crtc_in_state(old_state, crtc, old_crtc_state, i)
+		tegra_crtc_atomic_post_commit(crtc, old_state);
+}
+
 static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state)
 {
 	struct drm_device *drm = old_state->dev;
@@ -78,6 +90,8 @@ static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state)
 	} else {
 		drm_atomic_helper_commit_tail_rpm(old_state);
 	}
+
+	tegra_atomic_post_commit(drm, old_state);
 }

 static const struct drm_mode_config_helper_funcs
@@ -95,6 +109,7 @@ static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp)

 	idr_init_base(&fpriv->legacy_contexts, 1);
 	xa_init_flags(&fpriv->contexts, XA_FLAGS_ALLOC1);
+	xa_init(&fpriv->syncpoints);
 	mutex_init(&fpriv->lock);
 	filp->driver_priv = fpriv;

@@ -107,20 +122,6 @@ static void tegra_drm_context_free(struct tegra_drm_context *context)
 	kfree(context);
 }

-static struct host1x_bo *
-host1x_bo_lookup(struct drm_file *file, u32 handle)
-{
-	struct drm_gem_object *gem;
-	struct tegra_bo *bo;
-
-	gem = drm_gem_object_lookup(file, handle);
-	if (!gem)
-		return NULL;
-
-	bo = to_tegra_bo(gem);
-	return &bo->base;
-}
-
 static int host1x_reloc_copy_from_user(struct host1x_reloc *dest,
 				       struct drm_tegra_reloc __user *src,
 				       struct drm_device *drm,
@@ -151,11 +152,11 @@ static int host1x_reloc_copy_from_user(struct host1x_reloc *dest,

 	dest->flags = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE;

-	dest->cmdbuf.bo = host1x_bo_lookup(file, cmdbuf);
+	dest->cmdbuf.bo = tegra_gem_lookup(file, cmdbuf);
 	if (!dest->cmdbuf.bo)
 		return -ENOENT;

-	dest->target.bo = host1x_bo_lookup(file, target);
+	dest->target.bo = tegra_gem_lookup(file, target);
 	if (!dest->target.bo)
 		return -ENOENT;

@@ -193,7 +194,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		return -EINVAL;

 	job = host1x_job_alloc(context->channel, args->num_cmdbufs,
-			       args->num_relocs);
+			       args->num_relocs, false);
 	if (!job)
 		return -ENOMEM;

@@ -238,7 +239,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 			goto fail;
 		}

-		bo = host1x_bo_lookup(file, cmdbuf.handle);
+		bo = tegra_gem_lookup(file, cmdbuf.handle);
 		if (!bo) {
 			err = -ENOENT;
 			goto fail;
@@ -743,13 +744,15 @@ static const struct drm_ioctl_desc tegra_drm_ioctls[] = {
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_SUBMIT, tegra_drm_ioctl_channel_submit,
 			  DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_drm_ioctl_gem_create,
+	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_ALLOCATE, tegra_drm_ioctl_syncpoint_allocate,
 			  DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_drm_ioctl_gem_mmap,
+	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_FREE, tegra_drm_ioctl_syncpoint_free,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_WAIT, tegra_drm_ioctl_syncpoint_wait,
 			  DRM_RENDER_ALLOW),

-	DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE_LEGACY, tegra_gem_create, DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP_LEGACY, tegra_gem_mmap, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_READ, tegra_syncpt_read,
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_INCR, tegra_syncpt_incr,
@@ -868,7 +871,7 @@ static const struct drm_driver tegra_drm_driver = {
 static struct drm_driver tegra_drm_driver = {
 #endif
 	.driver_features = DRIVER_MODESET | DRIVER_GEM |
-			   DRIVER_ATOMIC | DRIVER_RENDER,
+			   DRIVER_ATOMIC | DRIVER_RENDER | DRIVER_SYNCOBJ,
 	.open = tegra_drm_open,
 	.postclose = tegra_drm_postclose,
 	.lastclose = drm_fb_helper_lastclose,
@@ -1348,8 +1351,10 @@ static const struct of_device_id host1x_drm_subdevs[] = {
 	{ .compatible = "nvidia,tegra30-hdmi", },
 	{ .compatible = "nvidia,tegra30-gr2d", },
 	{ .compatible = "nvidia,tegra30-gr3d", },
+	{ .compatible = "nvidia,tegra114-dc", },
 	{ .compatible = "nvidia,tegra114-dsi", },
 	{ .compatible = "nvidia,tegra114-hdmi", },
+	{ .compatible = "nvidia,tegra114-gr2d", },
 	{ .compatible = "nvidia,tegra114-gr3d", },
 	{ .compatible = "nvidia,tegra124-dc", },
 	{ .compatible = "nvidia,tegra124-sor", },
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -73,7 +73,12 @@ struct tegra_drm_client;
 struct tegra_drm_context {
 	struct tegra_drm_client *client;
 	struct host1x_channel *channel;
+
+	/* Only used by legacy UAPI. */
 	unsigned int id;
+
+	/* Only used by new UAPI. */
+	struct xarray mappings;
 };

 struct tegra_drm_client_ops {
@@ -95,13 +100,11 @@ struct tegra_drm_client {
 	struct host1x_client base;
 	struct list_head list;
 	struct tegra_drm *drm;
+	struct host1x_channel *shared_channel;

 	/* Set by driver */
 	unsigned int version;
 	const struct tegra_drm_client_ops *ops;
-
-	/* Set by TegraDRM core */
-	struct host1x_channel *shared_channel;
 };

 static inline struct tegra_drm_client *
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -14,6 +14,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
+#include <linux/version.h>

 #include <video/mipi_display.h>

@@ -1107,8 +1108,14 @@ static int tegra_dsi_runtime_resume(struct host1x_client *client)
 	struct device *dev = client->dev;
 	int err;

+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
+	err = pm_runtime_resume_and_get(dev);
+	if (err < 0) {
+#else
 	err = pm_runtime_get_sync(dev);
 	if (err < 0) {
+		pm_runtime_put_noidle(dev);
+#endif
 		dev_err(dev, "failed to get runtime PM: %d\n", err);
 		return err;
 	}
--- a/drivers/gpu/drm/tegra/uapi/firewall.c
+++ b/drivers/gpu/drm/tegra/uapi/firewall.c
@@ -1,10 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2010-2020 NVIDIA Corporation */

-#include "../drm.h"
-#include "../uapi.h"
-
+#include "drm.h"
 #include "submit.h"
+#include "uapi.h"

 struct tegra_drm_firewall {
 	struct tegra_drm_submit_data *submit;
@@ -108,8 +107,12 @@ static int fw_check_regs_imm(struct tegra_drm_firewall *fw, u32 offset)

 static int fw_check_class(struct tegra_drm_firewall *fw, u32 class)
 {
-	if (!fw->client->ops->is_valid_class)
-		return -EINVAL;
+	if (!fw->client->ops->is_valid_class) {
+		if (class == fw->client->base.class)
+			return 0;
+		else
+			return -EINVAL;
+	}

 	if (!fw->client->ops->is_valid_class(class))
 		return -EINVAL;
@@ -118,21 +121,21 @@ static int fw_check_class(struct tegra_drm_firewall *fw, u32 class)
 }

 enum {
-        HOST1X_OPCODE_SETCLASS  = 0x00,
-        HOST1X_OPCODE_INCR      = 0x01,
-        HOST1X_OPCODE_NONINCR   = 0x02,
-        HOST1X_OPCODE_MASK      = 0x03,
-        HOST1X_OPCODE_IMM       = 0x04,
-        HOST1X_OPCODE_RESTART   = 0x05,
-        HOST1X_OPCODE_GATHER    = 0x06,
-        HOST1X_OPCODE_SETSTRMID = 0x07,
-        HOST1X_OPCODE_SETAPPID  = 0x08,
-        HOST1X_OPCODE_SETPYLD   = 0x09,
-        HOST1X_OPCODE_INCR_W    = 0x0a,
-        HOST1X_OPCODE_NONINCR_W = 0x0b,
-        HOST1X_OPCODE_GATHER_W  = 0x0c,
-        HOST1X_OPCODE_RESTART_W = 0x0d,
-        HOST1X_OPCODE_EXTEND    = 0x0e,
+	HOST1X_OPCODE_SETCLASS  = 0x00,
+	HOST1X_OPCODE_INCR      = 0x01,
+	HOST1X_OPCODE_NONINCR   = 0x02,
+	HOST1X_OPCODE_MASK      = 0x03,
+	HOST1X_OPCODE_IMM       = 0x04,
+	HOST1X_OPCODE_RESTART   = 0x05,
+	HOST1X_OPCODE_GATHER    = 0x06,
+	HOST1X_OPCODE_SETSTRMID = 0x07,
+	HOST1X_OPCODE_SETAPPID  = 0x08,
+	HOST1X_OPCODE_SETPYLD   = 0x09,
+	HOST1X_OPCODE_INCR_W    = 0x0a,
+	HOST1X_OPCODE_NONINCR_W = 0x0b,
+	HOST1X_OPCODE_GATHER_W  = 0x0c,
+	HOST1X_OPCODE_RESTART_W = 0x0d,
+	HOST1X_OPCODE_EXTEND    = 0x0e,
 };

 int tegra_drm_fw_validate(struct tegra_drm_client *client, u32 *data, u32 start,
@@ -170,26 +173,46 @@ int tegra_drm_fw_validate(struct tegra_drm_client *client, u32 *data, u32 start,
 			*job_class = class;
 			if (!err)
 				err = fw_check_regs_mask(&fw, offset, mask);
+			if (err)
+				dev_warn(client->base.dev,
+					 "illegal SETCLASS(offset=0x%x, mask=0x%x, class=0x%x) at word %u",
+					 offset, mask, class, fw.pos-1);
 			break;
 		case HOST1X_OPCODE_INCR:
 			offset = (word >> 16) & 0xfff;
 			count = word & 0xffff;
 			err = fw_check_regs_seq(&fw, offset, count, true);
+			if (err)
+				dev_warn(client->base.dev,
+					 "illegal INCR(offset=0x%x, count=%u) in class 0x%x at word %u",
+					 offset, count, fw.class, fw.pos-1);
 			break;
 		case HOST1X_OPCODE_NONINCR:
 			offset = (word >> 16) & 0xfff;
 			count = word & 0xffff;
 			err = fw_check_regs_seq(&fw, offset, count, false);
+			if (err)
+				dev_warn(client->base.dev,
+					 "illegal NONINCR(offset=0x%x, count=%u) in class 0x%x at word %u",
+					 offset, count, fw.class, fw.pos-1);
 			break;
 		case HOST1X_OPCODE_MASK:
 			offset = (word >> 16) & 0xfff;
 			mask = word & 0xffff;
 			err = fw_check_regs_mask(&fw, offset, mask);
+			if (err)
+				dev_warn(client->base.dev,
+					 "illegal MASK(offset=0x%x, mask=0x%x) in class 0x%x at word %u",
+					 offset, mask, fw.class, fw.pos-1);
 			break;
 		case HOST1X_OPCODE_IMM:
 			/* IMM cannot reasonably be used to write a pointer */
 			offset = (word >> 16) & 0xfff;
 			err = fw_check_regs_imm(&fw, offset);
+			if (err)
+				dev_warn(client->base.dev,
+					 "illegal IMM(offset=0x%x) in class 0x%x at word %u",
+					 offset, fw.class, fw.pos-1);
 			break;
 		case HOST1X_OPCODE_SETPYLD:
 			payload = word & 0xffff;
@@ -201,6 +224,10 @@ int tegra_drm_fw_validate(struct tegra_drm_client *client, u32 *data, u32 start,

 			offset = word & 0x3fffff;
 			err = fw_check_regs_seq(&fw, offset, payload, true);
+			if (err)
+				dev_warn(client->base.dev,
+					 "illegal INCR_W(offset=0x%x) in class 0x%x at word %u",
+					 offset, fw.class, fw.pos-1);
 			break;
 		case HOST1X_OPCODE_NONINCR_W:
 			if (!payload_valid)
@@ -208,8 +235,14 @@ int tegra_drm_fw_validate(struct tegra_drm_client *client, u32 *data, u32 start,

 			offset = word & 0x3fffff;
 			err = fw_check_regs_seq(&fw, offset, payload, false);
+			if (err)
+				dev_warn(client->base.dev,
+					 "illegal NONINCR(offset=0x%x) in class 0x%x at word %u",
+					 offset, fw.class, fw.pos-1);
 			break;
 		default:
+			dev_warn(client->base.dev, "illegal opcode at word %u",
+				 fw.pos-1);
 			return -EINVAL;
 		}

--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -26,13 +26,13 @@
 MODULE_IMPORT_NS(DMA_BUF);
 #endif

-static unsigned int __sgt_dma_count_chunks(struct sg_table *sgt)
+static unsigned int sg_dma_count_chunks(struct scatterlist *sgl, unsigned int nents)
 {
 	dma_addr_t next = ~(dma_addr_t)0;
 	unsigned int count = 0, i;
 	struct scatterlist *s;

-	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+	for_each_sg(sgl, s, nents, i) {
 		/* sg_dma_address(s) is only valid for entries that have sg_dma_len(s) != 0. */
 		if (!sg_dma_len(s))
 			continue;
@@ -46,6 +46,11 @@ static unsigned int __sgt_dma_count_chunks(struct sg_table *sgt)
 	return count;
 }

+static inline unsigned int sgt_dma_count_chunks(struct sg_table *sgt)
+{
+	return sg_dma_count_chunks(sgt->sgl, sgt->nents);
+}
+
 static void tegra_bo_put(struct host1x_bo *bo)
 {
 	struct tegra_bo *obj = host1x_to_tegra_bo(bo);
@@ -89,7 +94,7 @@ static struct host1x_bo_mapping *tegra_bo_pin(struct device *dev, struct host1x_
 			goto free;
 		}

-		err = __sgt_dma_count_chunks(map->sgt);
+		err = sgt_dma_count_chunks(map->sgt);
 		map->size = gem->size;

 		goto out;
@@ -487,7 +492,6 @@ static struct tegra_bo *tegra_bo_import(struct drm_device *drm,
 	}

 	bo->gem.import_attach = attach;
-	bo->gem.resv = buf->resv;

 	return bo;

@@ -764,7 +768,6 @@ struct dma_buf *tegra_gem_prime_export(struct drm_gem_object *gem,
 	exp_info.size = gem->size;
 	exp_info.flags = flags;
 	exp_info.priv = gem;
-	exp_info.resv = gem->resv;

 	return drm_gem_dmabuf_export(gem->dev, &exp_info);
 }
@@ -789,3 +792,16 @@ struct drm_gem_object *tegra_gem_prime_import(struct drm_device *drm,

 	return &bo->gem;
 }
+
+struct host1x_bo *tegra_gem_lookup(struct drm_file *file, u32 handle)
+{
+	struct drm_gem_object *gem;
+	struct tegra_bo *bo;
+
+	gem = drm_gem_object_lookup(file, handle);
+	if (!gem)
+		return NULL;
+
+	bo = to_tegra_bo(gem);
+	return &bo->base;
+}
--- a/drivers/gpu/drm/tegra/gem.h
+++ b/drivers/gpu/drm/tegra/gem.h
@@ -80,4 +80,6 @@ struct dma_buf *tegra_gem_prime_export(struct drm_gem_object *gem,
 struct drm_gem_object *tegra_gem_prime_import(struct drm_device *drm,
 					      struct dma_buf *buf);

+struct host1x_bo *tegra_gem_lookup(struct drm_file *file, u32 handle);
+
 #endif
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -161,9 +161,14 @@ static const struct gr2d_soc tegra30_gr2d_soc = {
 	.version = 0x30,
 };

+static const struct gr2d_soc tegra114_gr2d_soc = {
+	.version = 0x35,
+};
+
 static const struct of_device_id gr2d_match[] = {
-	{ .compatible = "nvidia,tegra30-gr2d", .data = &tegra20_gr2d_soc },
-	{ .compatible = "nvidia,tegra20-gr2d", .data = &tegra30_gr2d_soc },
+	{ .compatible = "nvidia,tegra114-gr2d", .data = &tegra114_gr2d_soc },
+	{ .compatible = "nvidia,tegra30-gr2d", .data = &tegra30_gr2d_soc },
+	{ .compatible = "nvidia,tegra20-gr2d", .data = &tegra20_gr2d_soc },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, gr2d_match);
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -14,6 +14,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
+#include <linux/version.h>

 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
@@ -1506,8 +1507,14 @@ static int tegra_hdmi_runtime_resume(struct host1x_client *client)
 	struct device *dev = client->dev;
 	int err;

+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
+	err = pm_runtime_resume_and_get(dev);
+	if (err < 0) {
+#else
 	err = pm_runtime_get_sync(dev);
 	if (err < 0) {
+		pm_runtime_put_noidle(dev);
+#endif
 		dev_err(dev, "failed to get runtime PM: %d\n", err);
 		return err;
 	}
--- a/drivers/gpu/drm/tegra/hub.c
+++ b/drivers/gpu/drm/tegra/hub.c
@@ -1009,8 +1009,14 @@ static int tegra_display_hub_runtime_resume(struct host1x_client *client)
 	unsigned int i;
 	int err;

+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
+	err = pm_runtime_resume_and_get(dev);
+	if (err < 0) {
+#else
 	err = pm_runtime_get_sync(dev);
 	if (err < 0) {
+		pm_runtime_put_noidle(dev);
+#endif
 		dev_err(dev, "failed to get runtime PM: %d\n", err);
 		return err;
 	}
--- a/drivers/gpu/drm/tegra/hub.h
+++ b/drivers/gpu/drm/tegra/hub.h
@@ -72,7 +72,6 @@ to_tegra_display_hub_state(struct drm_private_state *priv)
 	return container_of(priv, struct tegra_display_hub_state, base);
 }

-struct tegra_dc;
 struct tegra_plane;

 int tegra_display_hub_prepare(struct tegra_display_hub *hub);
--- a/drivers/gpu/drm/tegra/include/uapi/drm/tegra_drm_next.h
+++ b/drivers/gpu/drm/tegra/include/uapi/drm/tegra_drm_next.h
@@ -10,7 +10,7 @@
 extern "C" {
 #endif

-/* TegraDRM legacy UAPI. Only enabled with STAGING */
+/* Tegra DRM legacy UAPI. Only enabled with STAGING */

 #define DRM_TEGRA_GEM_CREATE_TILED     (1 << 0)
 #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1)
@@ -627,8 +627,8 @@ struct drm_tegra_gem_get_flags {
 	__u32 flags;
 };

-#define DRM_TEGRA_GEM_CREATE_LEGACY	0x00
-#define DRM_TEGRA_GEM_MMAP_LEGACY	0x01
+#define DRM_TEGRA_GEM_CREATE		0x00
+#define DRM_TEGRA_GEM_MMAP		0x01
 #define DRM_TEGRA_SYNCPT_READ		0x02
 #define DRM_TEGRA_SYNCPT_INCR		0x03
 #define DRM_TEGRA_SYNCPT_WAIT		0x04
@@ -642,8 +642,8 @@ struct drm_tegra_gem_get_flags {
 #define DRM_TEGRA_GEM_SET_FLAGS		0x0c
 #define DRM_TEGRA_GEM_GET_FLAGS		0x0d

-#define DRM_IOCTL_TEGRA_GEM_CREATE_LEGACY DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_CREATE_LEGACY, struct drm_tegra_gem_create)
-#define DRM_IOCTL_TEGRA_GEM_MMAP_LEGACY DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_MMAP_LEGACY, struct drm_tegra_gem_mmap)
+#define DRM_IOCTL_TEGRA_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_CREATE, struct drm_tegra_gem_create)
+#define DRM_IOCTL_TEGRA_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_MMAP, struct drm_tegra_gem_mmap)
 #define DRM_IOCTL_TEGRA_SYNCPT_READ DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_READ, struct drm_tegra_syncpt_read)
 #define DRM_IOCTL_TEGRA_SYNCPT_INCR DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_INCR, struct drm_tegra_syncpt_incr)
 #define DRM_IOCTL_TEGRA_SYNCPT_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_WAIT, struct drm_tegra_syncpt_wait)
@@ -657,7 +657,15 @@ struct drm_tegra_gem_get_flags {
 #define DRM_IOCTL_TEGRA_GEM_SET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_SET_FLAGS, struct drm_tegra_gem_set_flags)
 #define DRM_IOCTL_TEGRA_GEM_GET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_GET_FLAGS, struct drm_tegra_gem_get_flags)

-/* New TegraDRM UAPI */
+/* New Tegra DRM UAPI */
+
+/*
+ * Reported by the driver in the `capabilities` field.
+ *
+ * DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT: If set, the engine is cache coherent
+ * with regard to the system memory.
+ */
+#define DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT (1 << 0)

 struct drm_tegra_channel_open {
 	/**
@@ -676,39 +684,57 @@ struct drm_tegra_channel_open {
 	__u32 flags;

 	/**
-	 * @channel_ctx: [out]
+	 * @context: [out]
 	 *
 	 * Opaque identifier corresponding to the opened channel.
 	 */
-	__u32 channel_ctx;
+	__u32 context;

 	/**
-	 * @hardware_version: [out]
+	 * @version: [out]
 	 *
 	 * Version of the engine hardware. This can be used by userspace
 	 * to determine how the engine needs to be programmed.
 	 */
-	__u32 hardware_version;
+	__u32 version;
+
+	/**
+	 * @capabilities: [out]
+	 *
+	 * Flags describing the hardware capabilities.
+	 */
+	__u32 capabilities;
+	__u32 padding;
 };

 struct drm_tegra_channel_close {
 	/**
-	 * @channel_ctx: [in]
+	 * @context: [in]
 	 *
 	 * Identifier of the channel to close.
 	 */
-	__u32 channel_ctx;
+	__u32 context;
+	__u32 padding;
 };

-#define DRM_TEGRA_CHANNEL_MAP_READWRITE			(1<<0)
+/*
+ * Mapping flags that can be used to influence how the mapping is created.
+ *
+ * DRM_TEGRA_CHANNEL_MAP_READ: create mapping that allows HW read access
+ * DRM_TEGRA_CHANNEL_MAP_WRITE: create mapping that allows HW write access
+ */
+#define DRM_TEGRA_CHANNEL_MAP_READ  (1 << 0)
+#define DRM_TEGRA_CHANNEL_MAP_WRITE (1 << 1)
+#define DRM_TEGRA_CHANNEL_MAP_READ_WRITE (DRM_TEGRA_CHANNEL_MAP_READ | \
+					  DRM_TEGRA_CHANNEL_MAP_WRITE)

 struct drm_tegra_channel_map {
 	/**
-	 * @channel_ctx: [in]
+	 * @context: [in]
 	 *
 	 * Identifier of the channel to which make memory available for.
 	 */
-	__u32 channel_ctx;
+	__u32 context;

 	/**
 	 * @handle: [in]
@@ -725,47 +751,46 @@ struct drm_tegra_channel_map {
 	__u32 flags;

 	/**
-	 * @mapping_id: [out]
+	 * @mapping: [out]
 	 *
 	 * Identifier corresponding to the mapping, to be used for
 	 * relocations or unmapping later.
 	 */
-	__u32 mapping_id;
+	__u32 mapping;
 };

 struct drm_tegra_channel_unmap {
 	/**
-	 * @channel_ctx: [in]
+	 * @context: [in]
 	 *
 	 * Channel identifier of the channel to unmap memory from.
 	 */
-	__u32 channel_ctx;
+	__u32 context;

 	/**
-	 * @mapping_id: [in]
+	 * @mapping: [in]
 	 *
 	 * Mapping identifier of the memory mapping to unmap.
 	 */
-	__u32 mapping_id;
+	__u32 mapping;
 };

 /* Submission */

 /**
- * Specify that bit 39 of the patched-in address should be set to
- * trigger layout swizzling between Tegra and non-Tegra Blocklinear
- * layout on systems that store surfaces in system memory in non-Tegra
- * Blocklinear layout.
+ * Specify that bit 39 of the patched-in address should be set to switch
+ * swizzling between Tegra and non-Tegra sector layout on systems that store
+ * surfaces in system memory in non-Tegra sector layout.
 */
-#define DRM_TEGRA_SUBMIT_BUF_RELOC_BLOCKLINEAR		(1<<0)
+#define DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT (1 << 0)

 struct drm_tegra_submit_buf {
 	/**
-	 * @mapping_id: [in]
+	 * @mapping: [in]
 	 *
 	 * Identifier of the mapping to use in the submission.
 	 */
-	__u32 mapping_id;
+	__u32 mapping;

 	/**
 	 * @flags: [in]
@@ -775,10 +800,7 @@ struct drm_tegra_submit_buf {
 	__u32 flags;

 	/**
-	 * Information for relocation patching. Relocation patching will
-	 * be done if the MAP IOCTL that created `mapping_id` did not
-	 * return an IOVA. If an IOVA was returned, the application is
-	 * responsible for patching the address into the gather.
+	 * Information for relocation patching.
 	 */
 	struct {
 		/**
@@ -807,44 +829,6 @@ struct drm_tegra_submit_buf {
 	} reloc;
 };

-struct drm_tegra_submit_syncpt_incr {
-	/**
-	 * @syncpt_fd: [in]
-	 *
-	 * Syncpoint file descriptor of the syncpoint that the job will
-	 * increment.
-	 */
-	__s32 syncpt_fd;
-
-	/**
-	 * @flags: [in]
-	 *
-	 * Flags.
-	 */
-	__u32 flags;
-
-	/**
-	 * @num_incrs: [in]
-	 *
-	 * Number of times the job will increment this syncpoint.
-	 */
-	__u32 num_incrs;
-
-	/**
-	 * @fence_value: [out]
-	 *
-	 * Value the syncpoint will have once the job has completed all
-	 * its specified syncpoint increments.
-	 *
-	 * Note that the kernel may increment the syncpoint before or after
-	 * the job. These increments are not reflected in this field.
-	 *
-	 * If the job hangs or times out, not all of the increments may
-	 * get executed.
-	 */
-	__u32 fence_value;
-};
-
 /**
 * Execute `words` words of Host1x opcodes specified in the `gather_data_ptr`
 * buffer. Each GATHER_UPTR command uses successive words from the buffer.
@@ -855,6 +839,11 @@ struct drm_tegra_submit_syncpt_incr {
 * commands.
 */
 #define DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT		1
+/**
+ * Wait for a syncpoint to reach a value before continuing with further
+ * commands. The threshold is calculated relative to the start of the job.
+ */
+#define DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT_RELATIVE	2

 struct drm_tegra_submit_cmd_gather_uptr {
 	__u32 words;
@@ -863,7 +852,7 @@ struct drm_tegra_submit_cmd_gather_uptr {

 struct drm_tegra_submit_cmd_wait_syncpt {
 	__u32 id;
-	__u32 threshold;
+	__u32 value;
 	__u32 reserved[2];
 };

@@ -890,13 +879,50 @@ struct drm_tegra_submit_cmd {
 	};
 };

+struct drm_tegra_submit_syncpt {
+	/**
+	 * @id: [in]
+	 *
+	 * ID of the syncpoint that the job will increment.
+	 */
+	__u32 id;
+
+	/**
+	 * @flags: [in]
+	 *
+	 * Flags.
+	 */
+	__u32 flags;
+
+	/**
+	 * @increments: [in]
+	 *
+	 * Number of times the job will increment this syncpoint.
+	 */
+	__u32 increments;
+
+	/**
+	 * @value: [out]
+	 *
+	 * Value the syncpoint will have once the job has completed all
+	 * its specified syncpoint increments.
+	 *
+	 * Note that the kernel may increment the syncpoint before or after
+	 * the job. These increments are not reflected in this field.
+	 *
+	 * If the job hangs or times out, not all of the increments may
+	 * get executed.
+	 */
+	__u32 value;
+};
+
 struct drm_tegra_channel_submit {
 	/**
-	 * @channel_ctx: [in]
+	 * @context: [in]
 	 *
 	 * Identifier of the channel to submit this job to.
 	 */
-	__u32 channel_ctx;
+	__u32 context;

 	/**
 	 * @num_bufs: [in]
@@ -941,22 +967,91 @@ struct drm_tegra_channel_submit {
 	 */
 	__u64 gather_data_ptr;

+	/**
+	 * @syncobj_in: [in]
+	 *
+	 * Handle for DRM syncobj that will be waited before submission.
+	 * Ignored if zero.
+	 */
+	__u32 syncobj_in;
+
+	/**
+	 * @syncobj_out: [in]
+	 *
+	 * Handle for DRM syncobj that will have its fence replaced with
+	 * the job's completion fence. Ignored if zero.
+	 */
+	__u32 syncobj_out;
+
 	/**
 	 * @syncpt_incr: [in,out]
 	 *
 	 * Information about the syncpoint the job will increment.
 	 */
-	struct drm_tegra_submit_syncpt_incr syncpt_incr;
+	struct drm_tegra_submit_syncpt syncpt;
 };

-#define DRM_IOCTL_TEGRA_CHANNEL_OPEN     DRM_IOWR(DRM_COMMAND_BASE + 0x10, struct drm_tegra_channel_open)
-#define DRM_IOCTL_TEGRA_CHANNEL_CLOSE    DRM_IOWR(DRM_COMMAND_BASE + 0x11, struct drm_tegra_channel_close)
-#define DRM_IOCTL_TEGRA_CHANNEL_MAP      DRM_IOWR(DRM_COMMAND_BASE + 0x12, struct drm_tegra_channel_map)
-#define DRM_IOCTL_TEGRA_CHANNEL_UNMAP    DRM_IOWR(DRM_COMMAND_BASE + 0x13, struct drm_tegra_channel_unmap)
-#define DRM_IOCTL_TEGRA_CHANNEL_SUBMIT   DRM_IOWR(DRM_COMMAND_BASE + 0x14, struct drm_tegra_channel_submit)
+struct drm_tegra_syncpoint_allocate {
+	/**
+	 * @id: [out]
+	 *
+	 * ID of allocated syncpoint.
+	 */
+	__u32 id;
+	__u32 padding;
+};

-#define DRM_IOCTL_TEGRA_GEM_CREATE       DRM_IOWR(DRM_COMMAND_BASE + 0x15, struct drm_tegra_gem_create)
-#define DRM_IOCTL_TEGRA_GEM_MMAP         DRM_IOWR(DRM_COMMAND_BASE + 0x16, struct drm_tegra_gem_mmap)
+struct drm_tegra_syncpoint_free {
+	/**
+	 * @id: [in]
+	 *
+	 * ID of syncpoint to free.
+	 */
+	__u32 id;
+	__u32 padding;
+};
+
+struct drm_tegra_syncpoint_wait {
+	/**
+	 * @timeout: [in]
+	 *
+	 * Absolute timestamp at which the wait will time out.
+	 */
+	__s64 timeout_ns;
+
+	/**
+	 * @id: [in]
+	 *
+	 * ID of syncpoint to wait on.
+	 */
+	__u32 id;
+
+	/**
+	 * @threshold: [in]
+	 *
+	 * Threshold to wait for.
+	 */
+	__u32 threshold;
+
+	/**
+	 * @value: [out]
+	 *
+	 * Value of the syncpoint upon wait completion.
+	 */
+	__u32 value;
+
+	__u32 padding;
+};
+
+#define DRM_IOCTL_TEGRA_CHANNEL_OPEN DRM_IOWR(DRM_COMMAND_BASE + 0x10, struct drm_tegra_channel_open)
+#define DRM_IOCTL_TEGRA_CHANNEL_CLOSE DRM_IOWR(DRM_COMMAND_BASE + 0x11, struct drm_tegra_channel_close)
+#define DRM_IOCTL_TEGRA_CHANNEL_MAP DRM_IOWR(DRM_COMMAND_BASE + 0x12, struct drm_tegra_channel_map)
+#define DRM_IOCTL_TEGRA_CHANNEL_UNMAP DRM_IOWR(DRM_COMMAND_BASE + 0x13, struct drm_tegra_channel_unmap)
+#define DRM_IOCTL_TEGRA_CHANNEL_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + 0x14, struct drm_tegra_channel_submit)
+
+#define DRM_IOCTL_TEGRA_SYNCPOINT_ALLOCATE DRM_IOWR(DRM_COMMAND_BASE + 0x20, struct drm_tegra_syncpoint_allocate)
+#define DRM_IOCTL_TEGRA_SYNCPOINT_FREE DRM_IOWR(DRM_COMMAND_BASE + 0x21, struct drm_tegra_syncpoint_free)
+#define DRM_IOCTL_TEGRA_SYNCPOINT_WAIT DRM_IOWR(DRM_COMMAND_BASE + 0x22, struct drm_tegra_syncpoint_wait)

 #if defined(__cplusplus)
 }
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -25,7 +25,6 @@ struct nvdec_config {
 	const char *firmware;
 	unsigned int version;
 	bool supports_sid;
-	unsigned int num_instances;
 };

 struct nvdec {
@@ -314,7 +313,6 @@ static const struct nvdec_config nvdec_t210_config = {
 	.firmware = NVIDIA_TEGRA_210_NVDEC_FIRMWARE,
 	.version = 0x21,
 	.supports_sid = false,
-	.num_instances = 1,
 };

 #define NVIDIA_TEGRA_186_NVDEC_FIRMWARE "nvidia/tegra186/nvdec.bin"
@@ -323,7 +321,6 @@ static const struct nvdec_config nvdec_t186_config = {
 	.firmware = NVIDIA_TEGRA_186_NVDEC_FIRMWARE,
 	.version = 0x18,
 	.supports_sid = true,
-	.num_instances = 1,
 };

 #define NVIDIA_TEGRA_194_NVDEC_FIRMWARE "nvidia/tegra194/nvdec.bin"
@@ -332,7 +329,6 @@ static const struct nvdec_config nvdec_t194_config = {
 	.firmware = NVIDIA_TEGRA_194_NVDEC_FIRMWARE,
 	.version = 0x19,
 	.supports_sid = true,
-	.num_instances = 2,
 };

 static const struct of_device_id tegra_nvdec_of_match[] = {
@@ -384,8 +380,7 @@ static int nvdec_probe(struct platform_device *pdev)
 		return err;
 	}

-	err = of_property_read_u32(dev->of_node, "nvidia,host1x-class",
-				   &host_class);
+	err = of_property_read_u32(dev->of_node, "nvidia,host1x-class", &host_class);
 	if (err < 0)
 		host_class = HOST1X_CLASS_NVDEC;

--- a/drivers/gpu/drm/tegra/plane.c
+++ b/drivers/gpu/drm/tegra/plane.c
@@ -4,6 +4,7 @@
 */

 #include <linux/iommu.h>
+#include <linux/interconnect.h>
 #include <linux/version.h>

 #include <drm/drm_atomic.h>
@@ -69,6 +70,9 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane)
 	copy->reflect_x = state->reflect_x;
 	copy->reflect_y = state->reflect_y;
 	copy->opaque = state->opaque;
+	copy->total_peak_memory_bandwidth = state->total_peak_memory_bandwidth;
+	copy->peak_memory_bandwidth = state->peak_memory_bandwidth;
+	copy->avg_memory_bandwidth = state->avg_memory_bandwidth;

 	for (i = 0; i < 2; i++)
 		copy->blending[i] = state->blending[i];
@@ -201,15 +205,18 @@ int tegra_plane_prepare_fb(struct drm_plane *plane,
 			   struct drm_plane_state *state)
 {
 	struct tegra_dc *dc = to_tegra_dc(state->crtc);
+	int err;

 	if (!state->fb)
 		return 0;

 #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 13, 0)
-	drm_gem_plane_helper_prepare_fb(plane, state);
+	err = drm_gem_plane_helper_prepare_fb(plane, state);
 #else
-	drm_gem_fb_prepare_fb(plane, state);
+	err = drm_gem_fb_prepare_fb(plane, state);
 #endif
+	if (err < 0)
+		return err;

 	return tegra_dc_pin(dc, to_tegra_plane_state(state));
 }
@@ -223,6 +230,78 @@ void tegra_plane_cleanup_fb(struct drm_plane *plane,
 		tegra_dc_unpin(dc, to_tegra_plane_state(state));
 }

+static int tegra_plane_calculate_memory_bandwidth(struct drm_plane_state *state)
+{
+	struct tegra_plane_state *tegra_state = to_tegra_plane_state(state);
+	unsigned int i, bpp, dst_w, dst_h, src_w, src_h, mul;
+	const struct tegra_dc_soc_info *soc;
+	const struct drm_format_info *fmt;
+	struct drm_crtc_state *crtc_state;
+	u64 avg_bandwidth, peak_bandwidth;
+
+	if (!state->visible)
+		return 0;
+
+	crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc);
+	if (!crtc_state)
+		return -EINVAL;
+
+	src_w = drm_rect_width(&state->src) >> 16;
+	src_h = drm_rect_height(&state->src) >> 16;
+	dst_w = drm_rect_width(&state->dst);
+	dst_h = drm_rect_height(&state->dst);
+
+	fmt = state->fb->format;
+	soc = to_tegra_dc(state->crtc)->soc;
+
+	/*
+	 * Note that real memory bandwidth vary depending on format and
+	 * memory layout, we are not taking that into account because small
+	 * estimation error isn't important since bandwidth is rounded up
+	 * anyway.
+	 */
+	for (i = 0, bpp = 0; i < fmt->num_planes; i++) {
+		unsigned int bpp_plane = fmt->cpp[i] * 8;
+
+		/*
+		 * Sub-sampling is relevant for chroma planes only and vertical
+		 * readouts are not cached, hence only horizontal sub-sampling
+		 * matters.
+		 */
+		if (i > 0)
+			bpp_plane /= fmt->hsub;
+
+		bpp += bpp_plane;
+	}
+
+	/* average bandwidth in kbytes/sec */
+	avg_bandwidth  = min(src_w, dst_w) * min(src_h, dst_h);
+	avg_bandwidth *= drm_mode_vrefresh(&crtc_state->adjusted_mode);
+	avg_bandwidth  = DIV_ROUND_UP(avg_bandwidth * bpp, 8) + 999;
+	do_div(avg_bandwidth, 1000);
+
+	/* mode.clock in kHz, peak bandwidth in kbytes/sec */
+	peak_bandwidth = DIV_ROUND_UP(crtc_state->adjusted_mode.clock * bpp, 8);
+
+	/*
+	 * Tegra30/114 Memory Controller can't interleave DC memory requests
+	 * for the tiled windows because DC uses 16-bytes atom, while DDR3
+	 * uses 32-bytes atom.  Hence there is x2 memory overfetch for tiled
+	 * framebuffer and DDR3 on these SoCs.
+	 */
+	if (soc->plane_tiled_memory_bandwidth_x2 &&
+	    tegra_state->tiling.mode == TEGRA_BO_TILING_MODE_TILED)
+		mul = 2;
+	else
+		mul = 1;
+
+	/* ICC bandwidth in kbytes/sec */
+	tegra_state->peak_memory_bandwidth = kBps_to_icc(peak_bandwidth) * mul;
+	tegra_state->avg_memory_bandwidth  = kBps_to_icc(avg_bandwidth)  * mul;
+
+	return 0;
+}
+
 int tegra_plane_state_add(struct tegra_plane *plane,
 			  struct drm_plane_state *state)
 {
@@ -241,6 +320,10 @@ int tegra_plane_state_add(struct tegra_plane *plane,
 	if (err < 0)
 		return err;

+	err = tegra_plane_calculate_memory_bandwidth(state);
+	if (err < 0)
+		return err;
+
 	tegra = to_dc_state(crtc_state);

 	tegra->planes |= WIN_A_ACT_REQ << plane->index;
@@ -625,3 +708,40 @@ int tegra_plane_setup_legacy_state(struct tegra_plane *tegra,

 	return 0;
 }
+
+static const char * const tegra_plane_icc_names[TEGRA_DC_LEGACY_PLANES_NUM] = {
+	"wina", "winb", "winc", NULL, NULL, NULL, "cursor",
+};
+
+int tegra_plane_interconnect_init(struct tegra_plane *plane)
+{
+	const char *icc_name = tegra_plane_icc_names[plane->index];
+	struct device *dev = plane->dc->dev;
+	struct tegra_dc *dc = plane->dc;
+	int err;
+
+	if (WARN_ON(plane->index >= TEGRA_DC_LEGACY_PLANES_NUM) ||
+	    WARN_ON(!tegra_plane_icc_names[plane->index]))
+		return -EINVAL;
+
+	plane->icc_mem = devm_of_icc_get(dev, icc_name);
+	err = PTR_ERR_OR_ZERO(plane->icc_mem);
+	if (err) {
+		dev_err_probe(dev, err, "failed to get %s interconnect\n",
+			      icc_name);
+		return err;
+	}
+
+	/* plane B on T20/30 has a dedicated memory client for a 6-tap vertical filter */
+	if (plane->index == 1 && dc->soc->has_win_b_vfilter_mem_client) {
+		plane->icc_mem_vfilter = devm_of_icc_get(dev, "winb-vfilter");
+		err = PTR_ERR_OR_ZERO(plane->icc_mem_vfilter);
+		if (err) {
+			dev_err_probe(dev, err, "failed to get %s interconnect\n",
+				      "winb-vfilter");
+			return err;
+		}
+	}
+
+	return 0;
+}
--- a/drivers/gpu/drm/tegra/plane.h
+++ b/drivers/gpu/drm/tegra/plane.h
@@ -8,6 +8,7 @@

 #include <drm/drm_plane.h>

+struct icc_path;
 struct tegra_bo;
 struct tegra_dc;

@@ -16,6 +17,9 @@ struct tegra_plane {
 	struct tegra_dc *dc;
 	unsigned int offset;
 	unsigned int index;
+
+	struct icc_path *icc_mem;
+	struct icc_path *icc_mem_vfilter;
 };

 struct tegra_cursor {
@@ -52,6 +56,11 @@ struct tegra_plane_state {
 	/* used for legacy blending support only */
 	struct tegra_plane_legacy_blending_state blending[2];
 	bool opaque;
+
+	/* bandwidths are in ICC units, i.e. kbytes/sec */
+	u32 total_peak_memory_bandwidth;
+	u32 peak_memory_bandwidth;
+	u32 avg_memory_bandwidth;
 };

 static inline struct tegra_plane_state *
@@ -63,6 +72,12 @@ to_tegra_plane_state(struct drm_plane_state *state)
 	return NULL;
 }

+static inline const struct tegra_plane_state *
+to_const_tegra_plane_state(const struct drm_plane_state *state)
+{
+	return to_tegra_plane_state((struct drm_plane_state *)state);
+}
+
 extern const struct drm_plane_funcs tegra_plane_funcs;

 int tegra_plane_prepare_fb(struct drm_plane *plane,
@@ -78,5 +93,6 @@ bool tegra_plane_format_is_indexed(unsigned int format);
 bool tegra_plane_format_is_yuv(unsigned int format, bool *planar, unsigned int *bpc);
 int tegra_plane_setup_legacy_state(struct tegra_plane *tegra,
 				   struct tegra_plane_state *state);
+int tegra_plane_interconnect_init(struct tegra_plane *plane);

 #endif /* TEGRA_PLANE_H */
--- a/drivers/gpu/drm/tegra/rgb.c
+++ b/drivers/gpu/drm/tegra/rgb.c
@@ -275,11 +275,8 @@ int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc)
 	if (output->bridge) {
 		err = drm_bridge_attach(&output->encoder, output->bridge,
 					NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR);
-		if (err) {
-			dev_err(output->dev, "failed to attach bridge: %d\n",
-				err);
+		if (err)
 			return err;
-		}

 		connector = drm_bridge_connector_init(drm, &output->encoder);
 		if (IS_ERR(connector)) {
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -13,6 +13,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
+#include <linux/version.h>

 #include <soc/tegra/pmc.h>

@@ -3111,9 +3112,14 @@ static int tegra_sor_init(struct host1x_client *client)
 	 * kernel is possible.
 	 */
 	if (sor->rst) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
+		err = pm_runtime_resume_and_get(sor->dev);
+		if (err < 0) {
+#else
 		err = pm_runtime_get_sync(sor->dev);
 		if (err < 0) {
 			pm_runtime_put_noidle(sor->dev);
+#endif
 			dev_err(sor->dev, "failed to get runtime PM: %d\n", err);
 			return err;
 		}
@@ -3228,8 +3234,14 @@ static int tegra_sor_runtime_resume(struct host1x_client *client)
 	struct device *dev = client->dev;
 	int err;

+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
+	err = pm_runtime_resume_and_get(dev);
+	if (err < 0) {
+#else
 	err = pm_runtime_get_sync(dev);
 	if (err < 0) {
+		pm_runtime_put_noidle(dev);
+#endif
 		dev_err(dev, "failed to get runtime PM: %d\n", err);
 		return err;
 	}
--- a/drivers/gpu/drm/tegra/submit.c
+++ b/drivers/gpu/drm/tegra/submit.c
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#include <linux/dma-fence-array.h>
+#include <linux/dma-mapping.h>
+#include <linux/file.h>
+#include <linux/host1x-next.h>
+#include <linux/iommu.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/nospec.h>
+#include <linux/pm_runtime.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/sync_file.h>
+#include <linux/version.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_syncobj.h>
+
+#include "drm.h"
+#include "gem.h"
+#include "submit.h"
+#include "uapi.h"
+
+#define SUBMIT_ERR(context, fmt, ...) \
+	dev_err_ratelimited(context->client->base.dev, \
+		"%s: job submission failed: " fmt "\n", \
+		current->comm, ##__VA_ARGS__)
+
+struct gather_bo {
+	struct host1x_bo base;
+
+	struct kref ref;
+
+	struct device *dev;
+	u32 *gather_data;
+	dma_addr_t gather_data_dma;
+	size_t gather_data_words;
+};
+
+static struct host1x_bo *gather_bo_get(struct host1x_bo *host_bo)
+{
+	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
+
+	kref_get(&bo->ref);
+
+	return host_bo;
+}
+
+static void gather_bo_release(struct kref *ref)
+{
+	struct gather_bo *bo = container_of(ref, struct gather_bo, ref);
+
+	dma_free_attrs(bo->dev, bo->gather_data_words * 4, bo->gather_data, bo->gather_data_dma,
+		       0);
+	kfree(bo);
+}
+
+static void gather_bo_put(struct host1x_bo *host_bo)
+{
+	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
+
+	kref_put(&bo->ref, gather_bo_release);
+}
+
+static struct host1x_bo_mapping *
+gather_bo_pin(struct device *dev, struct host1x_bo *bo, enum dma_data_direction direction)
+{
+	struct gather_bo *gather = container_of(bo, struct gather_bo, base);
+	struct host1x_bo_mapping *map;
+	int err;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&map->ref);
+	map->bo = host1x_bo_get(bo);
+	map->direction = direction;
+	map->dev = dev;
+
+	map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL);
+	if (!map->sgt) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	err = dma_get_sgtable(gather->dev, map->sgt, gather->gather_data, gather->gather_data_dma,
+			      gather->gather_data_words * 4);
+	if (err)
+		goto free_sgt;
+
+	err = dma_map_sgtable(dev, map->sgt, direction, 0);
+	if (err)
+		goto free_sgt;
+
+	map->phys = sg_dma_address(map->sgt->sgl);
+	map->size = gather->gather_data_words * 4;
+	map->chunks = err;
+
+	return map;
+
+free_sgt:
+	sg_free_table(map->sgt);
+	kfree(map->sgt);
+free:
+	kfree(map);
+	return ERR_PTR(err);
+}
+
+static void gather_bo_unpin(struct host1x_bo_mapping *map)
+{
+	if (!map)
+		return;
+
+	dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0);
+	sg_free_table(map->sgt);
+	kfree(map->sgt);
+	host1x_bo_put(map->bo);
+
+	kfree(map);
+}
+
+static void *gather_bo_mmap(struct host1x_bo *host_bo)
+{
+	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
+
+	return bo->gather_data;
+}
+
+static void gather_bo_munmap(struct host1x_bo *host_bo, void *addr)
+{
+}
+
+const struct host1x_bo_ops gather_bo_ops = {
+	.get = gather_bo_get,
+	.put = gather_bo_put,
+	.pin = gather_bo_pin,
+	.unpin = gather_bo_unpin,
+	.mmap = gather_bo_mmap,
+	.munmap = gather_bo_munmap,
+};
+
+static struct tegra_drm_mapping *
+tegra_drm_mapping_get(struct tegra_drm_context *context, u32 id)
+{
+	struct tegra_drm_mapping *mapping;
+
+	xa_lock(&context->mappings);
+
+	mapping = xa_load(&context->mappings, id);
+	if (mapping)
+		kref_get(&mapping->ref);
+
+	xa_unlock(&context->mappings);
+
+	return mapping;
+}
+
+static void *alloc_copy_user_array(void __user *from, size_t count, size_t size)
+{
+	size_t copy_len;
+	void *data;
+
+	if (check_mul_overflow(count, size, &copy_len))
+		return ERR_PTR(-EINVAL);
+
+	if (copy_len > 0x4000)
+		return ERR_PTR(-E2BIG);
+
+	data = kvmalloc(copy_len, GFP_KERNEL);
+	if (!data)
+		return ERR_PTR(-ENOMEM);
+
+	if (copy_from_user(data, from, copy_len)) {
+		kvfree(data);
+		return ERR_PTR(-EFAULT);
+	}
+
+	return data;
+}
+
+static int submit_copy_gather_data(struct gather_bo **pbo, struct device *dev,
+				   struct tegra_drm_context *context,
+				   struct drm_tegra_channel_submit *args)
+{
+	struct gather_bo *bo;
+	size_t copy_len;
+
+	if (args->gather_data_words == 0) {
+		SUBMIT_ERR(context, "gather_data_words cannot be zero");
+		return -EINVAL;
+	}
+
+	if (check_mul_overflow((size_t)args->gather_data_words, (size_t)4, &copy_len)) {
+		SUBMIT_ERR(context, "gather_data_words is too large");
+		return -EINVAL;
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo) {
+		SUBMIT_ERR(context, "failed to allocate memory for bo info");
+		return -ENOMEM;
+	}
+
+	host1x_bo_init(&bo->base, &gather_bo_ops);
+	kref_init(&bo->ref);
+	bo->dev = dev;
+
+	bo->gather_data = dma_alloc_attrs(dev, copy_len, &bo->gather_data_dma,
+					  GFP_KERNEL | __GFP_NOWARN, 0);
+	if (!bo->gather_data) {
+		SUBMIT_ERR(context, "failed to allocate memory for gather data");
+		kfree(bo);
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(bo->gather_data, u64_to_user_ptr(args->gather_data_ptr), copy_len)) {
+		SUBMIT_ERR(context, "failed to copy gather data from userspace");
+		dma_free_attrs(dev, copy_len, bo->gather_data, bo->gather_data_dma, 0);
+		kfree(bo);
+		return -EFAULT;
+	}
+
+	bo->gather_data_words = args->gather_data_words;
+
+	*pbo = bo;
+
+	return 0;
+}
+
+static int submit_write_reloc(struct tegra_drm_context *context, struct gather_bo *bo,
+			      struct drm_tegra_submit_buf *buf, struct tegra_drm_mapping *mapping)
+{
+	/* TODO check that target_offset is within bounds */
+	dma_addr_t iova = mapping->iova + buf->reloc.target_offset;
+	u32 written_ptr;
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	if (buf->flags & DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT)
+		iova |= BIT_ULL(39);
+#endif
+
+	written_ptr = iova >> buf->reloc.shift;
+
+	if (buf->reloc.gather_offset_words >= bo->gather_data_words) {
+		SUBMIT_ERR(context,
+			   "relocation has too large gather offset (%u vs gather length %zu)",
+			   buf->reloc.gather_offset_words, bo->gather_data_words);
+		return -EINVAL;
+	}
+
+	buf->reloc.gather_offset_words = array_index_nospec(buf->reloc.gather_offset_words,
+							    bo->gather_data_words);
+
+	bo->gather_data[buf->reloc.gather_offset_words] = written_ptr;
+
+	return 0;
+}
+
+static int submit_process_bufs(struct tegra_drm_context *context, struct gather_bo *bo,
+			       struct drm_tegra_channel_submit *args,
+			       struct tegra_drm_submit_data *job_data)
+{
+	struct tegra_drm_used_mapping *mappings;
+	struct drm_tegra_submit_buf *bufs;
+	int err;
+	u32 i;
+
+	bufs = alloc_copy_user_array(u64_to_user_ptr(args->bufs_ptr), args->num_bufs,
+				     sizeof(*bufs));
+	if (IS_ERR(bufs)) {
+		SUBMIT_ERR(context, "failed to copy bufs array from userspace");
+		return PTR_ERR(bufs);
+	}
+
+	mappings = kcalloc(args->num_bufs, sizeof(*mappings), GFP_KERNEL);
+	if (!mappings) {
+		SUBMIT_ERR(context, "failed to allocate memory for mapping info");
+		err = -ENOMEM;
+		goto done;
+	}
+
+	for (i = 0; i < args->num_bufs; i++) {
+		struct drm_tegra_submit_buf *buf = &bufs[i];
+		struct tegra_drm_mapping *mapping;
+
+		if (buf->flags & ~DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT) {
+			SUBMIT_ERR(context, "invalid flag specified for buffer");
+			err = -EINVAL;
+			goto drop_refs;
+		}
+
+		mapping = tegra_drm_mapping_get(context, buf->mapping);
+		if (!mapping) {
+			SUBMIT_ERR(context, "invalid mapping ID '%u' for buffer", buf->mapping);
+			err = -EINVAL;
+			goto drop_refs;
+		}
+
+		err = submit_write_reloc(context, bo, buf, mapping);
+		if (err) {
+			tegra_drm_mapping_put(mapping);
+			goto drop_refs;
+		}
+
+		mappings[i].mapping = mapping;
+		mappings[i].flags = buf->flags;
+	}
+
+	job_data->used_mappings = mappings;
+	job_data->num_used_mappings = i;
+
+	err = 0;
+
+	goto done;
+
+drop_refs:
+	while (i--)
+		tegra_drm_mapping_put(mappings[i].mapping);
+
+	kfree(mappings);
+	job_data->used_mappings = NULL;
+
+done:
+	kvfree(bufs);
+
+	return err;
+}
+
+static int submit_get_syncpt(struct tegra_drm_context *context, struct host1x_job *job,
+			     struct xarray *syncpoints, struct drm_tegra_channel_submit *args)
+{
+	struct host1x_syncpt *sp;
+
+	if (args->syncpt.flags) {
+		SUBMIT_ERR(context, "invalid flag specified for syncpt");
+		return -EINVAL;
+	}
+
+	/* Syncpt ref will be dropped on job release */
+	sp = xa_load(syncpoints, args->syncpt.id);
+	if (!sp) {
+		SUBMIT_ERR(context, "syncpoint specified in syncpt was not allocated");
+		return -EINVAL;
+	}
+
+	job->syncpt = host1x_syncpt_get(sp);
+	job->syncpt_incrs = args->syncpt.increments;
+
+	return 0;
+}
+
+static int submit_job_add_gather(struct host1x_job *job, struct tegra_drm_context *context,
+				 struct drm_tegra_submit_cmd_gather_uptr *cmd,
+				 struct gather_bo *bo, u32 *offset,
+				 struct tegra_drm_submit_data *job_data,
+				 u32 *class)
+{
+	u32 next_offset;
+
+	if (cmd->reserved[0] || cmd->reserved[1] || cmd->reserved[2]) {
+		SUBMIT_ERR(context, "non-zero reserved field in GATHER_UPTR command");
+		return -EINVAL;
+	}
+
+	/* Check for maximum gather size */
+	if (cmd->words > 16383) {
+		SUBMIT_ERR(context, "too many words in GATHER_UPTR command");
+		return -EINVAL;
+	}
+
+	if (check_add_overflow(*offset, cmd->words, &next_offset)) {
+		SUBMIT_ERR(context, "too many total words in job");
+		return -EINVAL;
+	}
+
+	if (next_offset > bo->gather_data_words) {
+		SUBMIT_ERR(context, "GATHER_UPTR command overflows gather data");
+		return -EINVAL;
+	}
+
+	if (tegra_drm_fw_validate(context->client, bo->gather_data, *offset,
+				  cmd->words, job_data, class)) {
+		SUBMIT_ERR(context, "job was rejected by firewall");
+		return -EINVAL;
+	}
+
+	host1x_job_add_gather(job, &bo->base, cmd->words, *offset * 4);
+
+	*offset = next_offset;
+
+	return 0;
+}
+
+static struct host1x_job *
+submit_create_job(struct tegra_drm_context *context, struct gather_bo *bo,
+		  struct drm_tegra_channel_submit *args, struct tegra_drm_submit_data *job_data,
+		  struct xarray *syncpoints)
+{
+	struct drm_tegra_submit_cmd *cmds;
+	u32 i, gather_offset = 0, class;
+	struct host1x_job *job;
+	int err;
+
+	/* Set initial class for firewall. */
+	class = context->client->base.class;
+
+	cmds = alloc_copy_user_array(u64_to_user_ptr(args->cmds_ptr), args->num_cmds,
+				     sizeof(*cmds));
+	if (IS_ERR(cmds)) {
+		SUBMIT_ERR(context, "failed to copy cmds array from userspace");
+		return ERR_CAST(cmds);
+	}
+
+	job = host1x_job_alloc(context->channel, args->num_cmds, 0, true);
+	if (!job) {
+		SUBMIT_ERR(context, "failed to allocate memory for job");
+		job = ERR_PTR(-ENOMEM);
+		goto done;
+	}
+
+	err = submit_get_syncpt(context, job, syncpoints, args);
+	if (err < 0)
+		goto free_job;
+
+	job->client = &context->client->base;
+	job->class = context->client->base.class;
+	job->serialize = true;
+
+	for (i = 0; i < args->num_cmds; i++) {
+		struct drm_tegra_submit_cmd *cmd = &cmds[i];
+
+		if (cmd->flags) {
+			SUBMIT_ERR(context, "unknown flags given for cmd");
+			err = -EINVAL;
+			goto free_job;
+		}
+
+		if (cmd->type == DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR) {
+			err = submit_job_add_gather(job, context, &cmd->gather_uptr, bo,
+						    &gather_offset, job_data, &class);
+			if (err)
+				goto free_job;
+		} else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT) {
+			if (cmd->wait_syncpt.reserved[0] || cmd->wait_syncpt.reserved[1]) {
+				SUBMIT_ERR(context, "non-zero reserved value");
+				err = -EINVAL;
+				goto free_job;
+			}
+
+			host1x_job_add_wait(job, cmd->wait_syncpt.id, cmd->wait_syncpt.value,
+					    false, class);
+		} else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT_RELATIVE) {
+			if (cmd->wait_syncpt.reserved[0] || cmd->wait_syncpt.reserved[1]) {
+				SUBMIT_ERR(context, "non-zero reserved value");
+				err = -EINVAL;
+				goto free_job;
+			}
+
+			if (cmd->wait_syncpt.id != args->syncpt.id) {
+				SUBMIT_ERR(context, "syncpoint ID in CMD_WAIT_SYNCPT_RELATIVE is not used by the job");
+				err = -EINVAL;
+				goto free_job;
+			}
+
+			host1x_job_add_wait(job, cmd->wait_syncpt.id, cmd->wait_syncpt.value,
+					    true, class);
+		} else {
+			SUBMIT_ERR(context, "unknown cmd type");
+			err = -EINVAL;
+			goto free_job;
+		}
+	}
+
+	if (gather_offset == 0) {
+		SUBMIT_ERR(context, "job must have at least one gather");
+		err = -EINVAL;
+		goto free_job;
+	}
+
+	goto done;
+
+free_job:
+	host1x_job_put(job);
+	job = ERR_PTR(err);
+
+done:
+	kvfree(cmds);
+
+	return job;
+}
+
+static void release_job(struct host1x_job *job)
+{
+	struct tegra_drm_client *client = container_of(job->client, struct tegra_drm_client, base);
+	struct tegra_drm_submit_data *job_data = job->user_data;
+	u32 i;
+
+	for (i = 0; i < job_data->num_used_mappings; i++)
+		tegra_drm_mapping_put(job_data->used_mappings[i].mapping);
+
+	kfree(job_data->used_mappings);
+	kfree(job_data);
+
+	if (pm_runtime_enabled(client->base.dev))
+		pm_runtime_put_autosuspend(client->base.dev);
+}
+
+int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data,
+				   struct drm_file *file)
+{
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_channel_submit *args = data;
+	struct tegra_drm_submit_data *job_data;
+	struct drm_syncobj *syncobj = NULL;
+	struct tegra_drm_context *context;
+	struct host1x_job *job;
+	struct gather_bo *bo;
+	u32 i;
+	int err;
+
+	mutex_lock(&fpriv->lock);
+
+	context = xa_load(&fpriv->contexts, args->context);
+	if (!context) {
+		mutex_unlock(&fpriv->lock);
+		pr_err_ratelimited("%s: %s: invalid channel context '%#x'", __func__,
+				   current->comm, args->context);
+		return -EINVAL;
+	}
+
+	if (args->syncobj_in) {
+		struct dma_fence *fence;
+
+		err = drm_syncobj_find_fence(file, args->syncobj_in, 0, 0, &fence);
+		if (err) {
+			SUBMIT_ERR(context, "invalid syncobj_in '%#x'", args->syncobj_in);
+			goto unlock;
+		}
+
+		err = dma_fence_wait_timeout(fence, true, msecs_to_jiffies(10000));
+		dma_fence_put(fence);
+		if (err) {
+			SUBMIT_ERR(context, "wait for syncobj_in timed out");
+			goto unlock;
+		}
+	}
+
+	if (args->syncobj_out) {
+		syncobj = drm_syncobj_find(file, args->syncobj_out);
+		if (!syncobj) {
+			SUBMIT_ERR(context, "invalid syncobj_out '%#x'", args->syncobj_out);
+			err = -ENOENT;
+			goto unlock;
+		}
+	}
+
+	/* Allocate gather BO and copy gather words in. */
+	err = submit_copy_gather_data(&bo, drm->dev, context, args);
+	if (err)
+		goto unlock;
+
+	job_data = kzalloc(sizeof(*job_data), GFP_KERNEL);
+	if (!job_data) {
+		SUBMIT_ERR(context, "failed to allocate memory for job data");
+		err = -ENOMEM;
+		goto put_bo;
+	}
+
+	/* Get data buffer mappings and do relocation patching. */
+	err = submit_process_bufs(context, bo, args, job_data);
+	if (err)
+		goto free_job_data;
+
+	/* Allocate host1x_job and add gathers and waits to it. */
+	job = submit_create_job(context, bo, args, job_data, &fpriv->syncpoints);
+	if (IS_ERR(job)) {
+		err = PTR_ERR(job);
+		goto free_job_data;
+	}
+
+	/* Map gather data for Host1x. */
+	err = host1x_job_pin(job, context->client->base.dev);
+	if (err) {
+		SUBMIT_ERR(context, "failed to pin job: %d", err);
+		goto put_job;
+	}
+
+	/* Boot engine. */
+	if (pm_runtime_enabled(context->client->base.dev)) {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
+		err = pm_runtime_resume_and_get(context->client->base.dev);
+		if (err < 0) {
+#else
+		err = pm_runtime_get_sync(context->client->base.dev);
+		if (err < 0) {
+			pm_runtime_put_noidle(context->client->base.dev);
+#endif
+			SUBMIT_ERR(context, "could not power up engine: %d", err);
+			goto unpin_job;
+		}
+	}
+
+	job->user_data = job_data;
+	job->release = release_job;
+	job->timeout = 10000;
+
+	/*
+	 * job_data is now part of job reference counting, so don't release
+	 * it from here.
+	 */
+	job_data = NULL;
+
+	/* Submit job to hardware. */
+	err = host1x_job_submit(job);
+	if (err) {
+		SUBMIT_ERR(context, "host1x job submission failed: %d", err);
+		goto unpin_job;
+	}
+
+	/* Return postfences to userspace and add fences to DMA reservations. */
+	args->syncpt.value = job->syncpt_end;
+
+	if (syncobj) {
+		struct dma_fence *fence = host1x_fence_create(job->syncpt, job->syncpt_end);
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			SUBMIT_ERR(context, "failed to create postfence: %d", err);
+		}
+
+		drm_syncobj_replace_fence(syncobj, fence);
+	}
+
+	goto put_job;
+
+unpin_job:
+	host1x_job_unpin(job);
+put_job:
+	host1x_job_put(job);
+free_job_data:
+	if (job_data && job_data->used_mappings) {
+		for (i = 0; i < job_data->num_used_mappings; i++)
+			tegra_drm_mapping_put(job_data->used_mappings[i].mapping);
+
+		kfree(job_data->used_mappings);
+	}
+
+	if (job_data)
+		kfree(job_data);
+put_bo:
+	gather_bo_put(&bo->base);
+unlock:
+	if (syncobj)
+		drm_syncobj_put(syncobj);
+
+	mutex_unlock(&fpriv->lock);
+	return err;
+}
--- a/drivers/gpu/drm/tegra/uapi/submit.h
+++ b/drivers/gpu/drm/tegra/uapi/submit.h
--- a/drivers/gpu/drm/tegra/uapi.c
+++ b/drivers/gpu/drm/tegra/uapi.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#include <linux/host1x-next.h>
+#include <linux/iommu.h>
+#include <linux/list.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_utils.h>
+
+#include "drm.h"
+#include "uapi.h"
+
+static void tegra_drm_mapping_release(struct kref *ref)
+{
+	struct tegra_drm_mapping *mapping =
+		container_of(ref, struct tegra_drm_mapping, ref);
+
+	host1x_bo_unpin(mapping->map);
+	host1x_bo_put(mapping->bo);
+
+	kfree(mapping);
+}
+
+void tegra_drm_mapping_put(struct tegra_drm_mapping *mapping)
+{
+	kref_put(&mapping->ref, tegra_drm_mapping_release);
+}
+
+static void tegra_drm_channel_context_close(struct tegra_drm_context *context)
+{
+	struct tegra_drm_mapping *mapping;
+	unsigned long id;
+
+	xa_for_each(&context->mappings, id, mapping)
+		tegra_drm_mapping_put(mapping);
+
+	xa_destroy(&context->mappings);
+
+	host1x_channel_put(context->channel);
+
+	kfree(context);
+}
+
+void tegra_drm_uapi_close_file(struct tegra_drm_file *file)
+{
+	struct tegra_drm_context *context;
+	struct host1x_syncpt *sp;
+	unsigned long id;
+
+	xa_for_each(&file->contexts, id, context)
+		tegra_drm_channel_context_close(context);
+
+	xa_for_each(&file->syncpoints, id, sp)
+		host1x_syncpt_put(sp);
+
+	xa_destroy(&file->contexts);
+	xa_destroy(&file->syncpoints);
+}
+
+static struct tegra_drm_client *tegra_drm_find_client(struct tegra_drm *tegra, u32 class)
+{
+	struct tegra_drm_client *client;
+
+	list_for_each_entry(client, &tegra->clients, list)
+		if (client->base.class == class)
+			return client;
+
+	return NULL;
+}
+
+int tegra_drm_ioctl_channel_open(struct drm_device *drm, void *data, struct drm_file *file)
+{
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct tegra_drm *tegra = drm->dev_private;
+	struct drm_tegra_channel_open *args = data;
+	struct tegra_drm_client *client = NULL;
+	struct tegra_drm_context *context;
+	int err;
+
+	if (args->flags)
+		return -EINVAL;
+
+	context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return -ENOMEM;
+
+	client = tegra_drm_find_client(tegra, args->host1x_class);
+	if (!client) {
+		err = -ENODEV;
+		goto free;
+	}
+
+	if (client->shared_channel) {
+		context->channel = host1x_channel_get(client->shared_channel);
+	} else {
+		context->channel = host1x_channel_request(&client->base);
+		if (!context->channel) {
+			err = -EBUSY;
+			goto free;
+		}
+	}
+
+	err = xa_alloc(&fpriv->contexts, &args->context, context, XA_LIMIT(1, U32_MAX),
+		       GFP_KERNEL);
+	if (err < 0)
+		goto put_channel;
+
+	context->client = client;
+	xa_init_flags(&context->mappings, XA_FLAGS_ALLOC1);
+
+	args->version = client->version;
+	args->capabilities = 0;
+
+	if (device_get_dma_attr(client->base.dev) == DEV_DMA_COHERENT)
+		args->capabilities |= DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT;
+
+	return 0;
+
+put_channel:
+	host1x_channel_put(context->channel);
+free:
+	kfree(context);
+
+	return err;
+}
+
+int tegra_drm_ioctl_channel_close(struct drm_device *drm, void *data, struct drm_file *file)
+{
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_channel_close *args = data;
+	struct tegra_drm_context *context;
+
+	mutex_lock(&fpriv->lock);
+
+	context = xa_load(&fpriv->contexts, args->context);
+	if (!context) {
+		mutex_unlock(&fpriv->lock);
+		return -EINVAL;
+	}
+
+	xa_erase(&fpriv->contexts, args->context);
+
+	mutex_unlock(&fpriv->lock);
+
+	tegra_drm_channel_context_close(context);
+
+	return 0;
+}
+
+int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_file *file)
+{
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_channel_map *args = data;
+	struct tegra_drm_mapping *mapping;
+	struct tegra_drm_context *context;
+	enum dma_data_direction direction;
+	int err = 0;
+
+	if (args->flags & ~DRM_TEGRA_CHANNEL_MAP_READ_WRITE)
+		return -EINVAL;
+
+	mutex_lock(&fpriv->lock);
+
+	context = xa_load(&fpriv->contexts, args->context);
+	if (!context) {
+		mutex_unlock(&fpriv->lock);
+		return -EINVAL;
+	}
+
+	mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
+	if (!mapping) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	kref_init(&mapping->ref);
+
+	mapping->bo = tegra_gem_lookup(file, args->handle);
+	if (!mapping->bo) {
+		err = -EINVAL;
+		goto free;
+	}
+
+	switch (args->flags & DRM_TEGRA_CHANNEL_MAP_READ_WRITE) {
+	case DRM_TEGRA_CHANNEL_MAP_READ_WRITE:
+		direction = DMA_BIDIRECTIONAL;
+		break;
+
+	case DRM_TEGRA_CHANNEL_MAP_WRITE:
+		direction = DMA_FROM_DEVICE;
+		break;
+
+	case DRM_TEGRA_CHANNEL_MAP_READ:
+		direction = DMA_TO_DEVICE;
+		break;
+
+	default:
+		err = -EINVAL;
+		goto put_gem;
+	}
+
+	mapping->map = host1x_bo_pin(context->client->base.dev, mapping->bo, direction, NULL);
+	if (IS_ERR(mapping->map)) {
+		err = PTR_ERR(mapping->map);
+		goto put_gem;
+	}
+
+	mapping->iova = mapping->map->phys;
+	mapping->iova_end = mapping->iova + host1x_to_tegra_bo(mapping->bo)->gem.size;
+
+	err = xa_alloc(&context->mappings, &args->mapping, mapping, XA_LIMIT(1, U32_MAX),
+		       GFP_KERNEL);
+	if (err < 0)
+		goto unpin;
+
+	mutex_unlock(&fpriv->lock);
+
+	return 0;
+
+unpin:
+	host1x_bo_unpin(mapping->map);
+put_gem:
+	host1x_bo_put(mapping->bo);
+free:
+	kfree(mapping);
+unlock:
+	mutex_unlock(&fpriv->lock);
+	return err;
+}
+
+int tegra_drm_ioctl_channel_unmap(struct drm_device *drm, void *data, struct drm_file *file)
+{
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_channel_unmap *args = data;
+	struct tegra_drm_mapping *mapping;
+	struct tegra_drm_context *context;
+
+	mutex_lock(&fpriv->lock);
+
+	context = xa_load(&fpriv->contexts, args->context);
+	if (!context) {
+		mutex_unlock(&fpriv->lock);
+		return -EINVAL;
+	}
+
+	mapping = xa_erase(&context->mappings, args->mapping);
+
+	mutex_unlock(&fpriv->lock);
+
+	if (!mapping)
+		return -EINVAL;
+
+	tegra_drm_mapping_put(mapping);
+	return 0;
+}
+
+int tegra_drm_ioctl_syncpoint_allocate(struct drm_device *drm, void *data, struct drm_file *file)
+{
+	struct host1x *host1x = tegra_drm_to_host1x(drm->dev_private);
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_syncpoint_allocate *args = data;
+	struct host1x_syncpt *sp;
+	int err;
+
+	if (args->id)
+		return -EINVAL;
+
+	sp = host1x_syncpt_alloc(host1x, HOST1X_SYNCPT_CLIENT_MANAGED, current->comm);
+	if (!sp)
+		return -EBUSY;
+
+	args->id = host1x_syncpt_id(sp);
+
+	err = xa_insert(&fpriv->syncpoints, args->id, sp, GFP_KERNEL);
+	if (err) {
+		host1x_syncpt_put(sp);
+		return err;
+	}
+
+	return 0;
+}
+
+int tegra_drm_ioctl_syncpoint_free(struct drm_device *drm, void *data, struct drm_file *file)
+{
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_syncpoint_allocate *args = data;
+	struct host1x_syncpt *sp;
+
+	mutex_lock(&fpriv->lock);
+	sp = xa_erase(&fpriv->syncpoints, args->id);
+	mutex_unlock(&fpriv->lock);
+
+	if (!sp)
+		return -EINVAL;
+
+	host1x_syncpt_put(sp);
+
+	return 0;
+}
+
+int tegra_drm_ioctl_syncpoint_wait(struct drm_device *drm, void *data, struct drm_file *file)
+{
+	struct host1x *host1x = tegra_drm_to_host1x(drm->dev_private);
+	struct drm_tegra_syncpoint_wait *args = data;
+	signed long timeout_jiffies;
+	struct host1x_syncpt *sp;
+
+	if (args->padding != 0)
+		return -EINVAL;
+
+	sp = host1x_syncpt_get_by_id_noref(host1x, args->id);
+	if (!sp)
+		return -EINVAL;
+
+	timeout_jiffies = drm_timeout_abs_to_jiffies(args->timeout_ns);
+
+	return host1x_syncpt_wait(sp, args->threshold, timeout_jiffies, &args->value);
+}
--- a/drivers/gpu/drm/tegra/uapi.h
+++ b/drivers/gpu/drm/tegra/uapi.h
@@ -21,12 +21,7 @@ struct tegra_drm_file {

 	/* New UAPI state */
 	struct xarray contexts;
-};
-
-struct tegra_drm_channel_ctx {
-	struct tegra_drm_client *client;
-	struct host1x_channel *channel;
-	struct xarray mappings;
+	struct xarray syncpoints;
 };

 struct tegra_drm_mapping {
@@ -46,17 +41,17 @@ int tegra_drm_ioctl_channel_close(struct drm_device *drm, void *data,
 int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data,
 				struct drm_file *file);
 int tegra_drm_ioctl_channel_unmap(struct drm_device *drm, void *data,
-				struct drm_file *file);
+				  struct drm_file *file);
 int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data,
 				   struct drm_file *file);
-int tegra_drm_ioctl_gem_create(struct drm_device *drm, void *data,
-				struct drm_file *file);
-int tegra_drm_ioctl_gem_mmap(struct drm_device *drm, void *data,
-				struct drm_file *file);
+int tegra_drm_ioctl_syncpoint_allocate(struct drm_device *drm, void *data,
+				       struct drm_file *file);
+int tegra_drm_ioctl_syncpoint_free(struct drm_device *drm, void *data,
+				   struct drm_file *file);
+int tegra_drm_ioctl_syncpoint_wait(struct drm_device *drm, void *data,
+				   struct drm_file *file);

 void tegra_drm_uapi_close_file(struct tegra_drm_file *file);
 void tegra_drm_mapping_put(struct tegra_drm_mapping *mapping);
-struct tegra_drm_channel_ctx *
-tegra_drm_channel_ctx_lock(struct tegra_drm_file *file, u32 id);

 #endif
--- a/drivers/gpu/drm/tegra/uapi/gather_bo.c
+++ b/drivers/gpu/drm/tegra/uapi/gather_bo.c
@@ -1,111 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (c) 2020 NVIDIA Corporation */
-
-#include <linux/dma-mapping.h>
-#include <linux/scatterlist.h>
-#include <linux/slab.h>
-
-#include "gather_bo.h"
-
-static struct host1x_bo *gather_bo_get(struct host1x_bo *host_bo)
-{
-	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
-
-	kref_get(&bo->ref);
-
-	return host_bo;
-}
-
-static void gather_bo_release(struct kref *ref)
-{
-	struct gather_bo *bo = container_of(ref, struct gather_bo, ref);
-
-	dma_free_attrs(bo->dev, bo->gather_data_words * 4, bo->gather_data, bo->gather_data_dma,
-		       0);
-	kfree(bo);
-}
-
-void gather_bo_put(struct host1x_bo *host_bo)
-{
-	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
-
-	kref_put(&bo->ref, gather_bo_release);
-}
-
-static struct host1x_bo_mapping *
-gather_bo_pin(struct device *dev, struct host1x_bo *bo, enum dma_data_direction direction)
-{
-	struct gather_bo *gather = container_of(bo, struct gather_bo, base);
-	struct host1x_bo_mapping *map;
-	int err;
-
-	map = kzalloc(sizeof(*map), GFP_KERNEL);
-	if (!map)
-		return ERR_PTR(-ENOMEM);
-
-	kref_init(&map->ref);
-	map->bo = host1x_bo_get(bo);
-	map->direction = direction;
-	map->dev = dev;
-
-	map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL);
-	if (!map->sgt) {
-		err = -ENOMEM;
-		goto free;
-	}
-
-	err = dma_get_sgtable(gather->dev, map->sgt, gather->gather_data, gather->gather_data_dma,
-			      gather->gather_data_words * 4);
-	if (err)
-		goto free_sgt;
-
-	err = dma_map_sgtable(dev, map->sgt, direction, 0);
-	if (err)
-		goto free_sgt;
-
-	map->phys = sg_dma_address(map->sgt->sgl);
-	map->size = gather->gather_data_words * 4;
-	map->chunks = err;
-
-	return map;
-
-free_sgt:
-	sg_free_table(map->sgt);
-	kfree(map->sgt);
-free:
-	kfree(map);
-	return ERR_PTR(err);
-}
-
-static void gather_bo_unpin(struct host1x_bo_mapping *map)
-{
-	if (!map)
-		return;
-
-	dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0);
-	sg_free_table(map->sgt);
-	kfree(map->sgt);
-	host1x_bo_put(map->bo);
-
-	kfree(map);
-}
-
-static void *gather_bo_mmap(struct host1x_bo *host_bo)
-{
-	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
-
-	return bo->gather_data;
-}
-
-static void gather_bo_munmap(struct host1x_bo *host_bo, void *addr)
-{
-}
-
-const struct host1x_bo_ops gather_bo_ops = {
-	.get = gather_bo_get,
-	.put = gather_bo_put,
-	.pin = gather_bo_pin,
-	.unpin = gather_bo_unpin,
-	.mmap = gather_bo_mmap,
-	.munmap = gather_bo_munmap,
-};
--- a/drivers/gpu/drm/tegra/uapi/gather_bo.h
+++ b/drivers/gpu/drm/tegra/uapi/gather_bo.h
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright (c) 2020 NVIDIA Corporation */
-
-#ifndef _TEGRA_DRM_SUBMIT_GATHER_BO_H
-#define _TEGRA_DRM_SUBMIT_GATHER_BO_H
-
-#include <linux/host1x-next.h>
-#include <linux/kref.h>
-
-struct gather_bo {
-	struct host1x_bo base;
-
-	struct kref ref;
-
-	struct device *dev;
-	u32 *gather_data;
-	dma_addr_t gather_data_dma;
-	size_t gather_data_words;
-};
-
-extern const struct host1x_bo_ops gather_bo_ops;
-void gather_bo_put(struct host1x_bo *host_bo);
-
-#endif
--- a/drivers/gpu/drm/tegra/uapi/submit.c
+++ b/drivers/gpu/drm/tegra/uapi/submit.c
@@ -1,441 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (c) 2020 NVIDIA Corporation */
-
-#include <linux/dma-fence-array.h>
-#include <linux/file.h>
-#include <linux/host1x-next.h>
-#include <linux/iommu.h>
-#include <linux/kref.h>
-#include <linux/list.h>
-#include <linux/nospec.h>
-#include <linux/pm_runtime.h>
-#include <linux/sync_file.h>
-
-#include <drm/drm_drv.h>
-#include <drm/drm_file.h>
-
-#include "../uapi.h"
-#include "../drm.h"
-#include "../gem.h"
-
-#include "gather_bo.h"
-#include "submit.h"
-
-static struct tegra_drm_mapping *
-tegra_drm_mapping_get(struct tegra_drm_channel_ctx *ctx, u32 id)
-{
-	struct tegra_drm_mapping *mapping;
-
-	xa_lock(&ctx->mappings);
-	mapping = xa_load(&ctx->mappings, id);
-	if (mapping)
-		kref_get(&mapping->ref);
-	xa_unlock(&ctx->mappings);
-
-	return mapping;
-}
-
-static void *alloc_copy_user_array(void __user *from, size_t count, size_t size)
-{
-	unsigned long copy_err;
-	size_t copy_len;
-	void *data;
-
-	if (check_mul_overflow(count, size, &copy_len))
-		return ERR_PTR(-EINVAL);
-
-	if (copy_len > 0x4000)
-		return ERR_PTR(-E2BIG);
-
-	data = kvmalloc(copy_len, GFP_KERNEL);
-	if (!data)
-		return ERR_PTR(-ENOMEM);
-
-	copy_err = copy_from_user(data, from, copy_len);
-	if (copy_err) {
-		kvfree(data);
-		return ERR_PTR(-EFAULT);
-	}
-
-	return data;
-}
-
-static int submit_copy_gather_data(struct drm_device *drm,
-				   struct gather_bo **pbo,
-				   struct drm_tegra_channel_submit *args)
-{
-	unsigned long copy_err;
-	struct gather_bo *bo;
-	size_t copy_len;
-
-	if (args->gather_data_words == 0) {
-		drm_info(drm, "gather_data_words can't be 0");
-		return -EINVAL;
-	}
-
-	if (check_mul_overflow((size_t)args->gather_data_words, (size_t)4, &copy_len))
-		return -EINVAL;
-
-	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
-	if (!bo)
-		return -ENOMEM;
-
-	kref_init(&bo->ref);
-	host1x_bo_init(&bo->base, &gather_bo_ops);
-	bo->dev = drm->dev;
-
-	bo->gather_data = dma_alloc_attrs(bo->dev, copy_len, &bo->gather_data_dma,
-					  GFP_KERNEL | __GFP_NOWARN, 0);
-	if (!bo->gather_data) {
-		kfree(bo);
-		return -ENOMEM;
-	}
-
-	copy_err = copy_from_user(bo->gather_data,
-				  u64_to_user_ptr(args->gather_data_ptr),
-				  copy_len);
-	if (copy_err) {
-		dma_free_attrs(drm->dev, copy_len, bo->gather_data, bo->gather_data_dma, 0);
-		kfree(bo->gather_data);
-		kfree(bo);
-		return -EFAULT;
-	}
-
-	bo->gather_data_words = args->gather_data_words;
-
-	*pbo = bo;
-
-	return 0;
-}
-
-static int submit_write_reloc(struct gather_bo *bo,
-			      struct drm_tegra_submit_buf *buf,
-			      struct tegra_drm_mapping *mapping)
-{
-	/* TODO check that target_offset is within bounds */
-	dma_addr_t iova = mapping->iova + buf->reloc.target_offset;
-	u32 written_ptr;
-
-#ifdef CONFIG_ARM64
-	if (buf->flags & DRM_TEGRA_SUBMIT_BUF_RELOC_BLOCKLINEAR)
-		iova |= BIT(39);
-#endif
-
-	written_ptr = (u32)(iova >> buf->reloc.shift);
-
-	if (buf->reloc.gather_offset_words >= bo->gather_data_words)
-		return -EINVAL;
-
-	buf->reloc.gather_offset_words = array_index_nospec(
-		buf->reloc.gather_offset_words, bo->gather_data_words);
-
-	bo->gather_data[buf->reloc.gather_offset_words] = written_ptr;
-
-	return 0;
-}
-
-static int submit_process_bufs(struct drm_device *drm, struct gather_bo *bo,
-			       struct tegra_drm_submit_data *job_data,
-			       struct tegra_drm_channel_ctx *ctx,
-			       struct drm_tegra_channel_submit *args)
-{
-	struct tegra_drm_used_mapping *mappings;
-	struct drm_tegra_submit_buf *bufs;
-	int err;
-	u32 i;
-
-	bufs = alloc_copy_user_array(u64_to_user_ptr(args->bufs_ptr),
-				     args->num_bufs, sizeof(*bufs));
-	if (IS_ERR(bufs))
-		return PTR_ERR(bufs);
-
-	mappings = kcalloc(args->num_bufs, sizeof(*mappings), GFP_KERNEL);
-	if (!mappings) {
-		err = -ENOMEM;
-		goto done;
-	}
-
-	for (i = 0; i < args->num_bufs; i++) {
-		struct drm_tegra_submit_buf *buf = &bufs[i];
-		struct tegra_drm_mapping *mapping;
-
-		if (buf->flags & ~DRM_TEGRA_SUBMIT_BUF_RELOC_BLOCKLINEAR) {
-			err = -EINVAL;
-			goto drop_refs;
-		}
-
-		mapping = tegra_drm_mapping_get(ctx, buf->mapping_id);
-		if (!mapping) {
-			drm_info(drm, "invalid mapping_id for buf: %u",
-				 buf->mapping_id);
-			err = -EINVAL;
-			goto drop_refs;
-		}
-
-		err = submit_write_reloc(bo, buf, mapping);
-		if (err) {
-			tegra_drm_mapping_put(mapping);
-			goto drop_refs;
-		}
-
-		mappings[i].mapping = mapping;
-		mappings[i].flags = buf->flags;
-	}
-
-	job_data->used_mappings = mappings;
-	job_data->num_used_mappings = i;
-
-	err = 0;
-
-	goto done;
-
-drop_refs:
-	for (;;) {
-		if (i-- == 0)
-			break;
-
-		tegra_drm_mapping_put(mappings[i].mapping);
-	}
-
-	kfree(mappings);
-	job_data->used_mappings = NULL;
-
-done:
-	kvfree(bufs);
-
-	return err;
-}
-
-static int submit_get_syncpt(struct drm_device *drm, struct host1x_job *job,
-			     struct drm_tegra_channel_submit *args)
-{
-	struct host1x_syncpt *sp;
-
-	if (args->syncpt_incr.flags)
-		return -EINVAL;
-
-	/* Syncpt ref will be dropped on job release */
-	sp = host1x_syncpt_fd_get(args->syncpt_incr.syncpt_fd);
-	if (IS_ERR(sp))
-		return PTR_ERR(sp);
-
-	job->syncpt = sp;
-	job->syncpt_incrs = args->syncpt_incr.num_incrs;
-
-	return 0;
-}
-
-static int submit_job_add_gather(struct host1x_job *job,
-				 struct tegra_drm_channel_ctx *ctx,
-				 struct drm_tegra_submit_cmd_gather_uptr *cmd,
-				 struct gather_bo *bo, u32 *offset,
-				 struct tegra_drm_submit_data *job_data,
-				 u32 *class)
-{
-	u32 next_offset;
-
-	if (cmd->reserved[0] || cmd->reserved[1] || cmd->reserved[2])
-		return -EINVAL;
-
-	/* Check for maximum gather size */
-	if (cmd->words > 16383)
-		return -EINVAL;
-
-	if (check_add_overflow(*offset, cmd->words, &next_offset))
-		return -EINVAL;
-
-	if (next_offset > bo->gather_data_words)
-		return -EINVAL;
-
-	if (tegra_drm_fw_validate(ctx->client, bo->gather_data, *offset,
-				  cmd->words, job_data, class))
-		return -EINVAL;
-
-	host1x_job_add_gather(job, &bo->base, cmd->words, *offset * 4);
-
-	*offset = next_offset;
-
-	return 0;
-}
-
-static int submit_create_job(struct drm_device *drm, struct host1x_job **pjob,
-			     struct gather_bo *bo,
-			     struct tegra_drm_channel_ctx *ctx,
-			     struct drm_tegra_channel_submit *args,
-			     struct tegra_drm_submit_data *job_data)
-{
-	struct drm_tegra_submit_cmd *cmds;
-	u32 i, gather_offset = 0, class;
-	struct host1x_job *job;
-	int err;
-
-	/* Set initial class for firewall. */
-	class = ctx->client->base.class;
-
-	cmds = alloc_copy_user_array(u64_to_user_ptr(args->cmds_ptr),
-				     args->num_cmds, sizeof(*cmds));
-	if (IS_ERR(cmds))
-		return PTR_ERR(cmds);
-
-	job = host1x_job_alloc(ctx->channel, args->num_cmds, 0);
-	if (!job) {
-		err = -ENOMEM;
-		goto done;
-	}
-
-	err = submit_get_syncpt(drm, job, args);
-	if (err < 0)
-		goto free_job;
-
-	job->client = &ctx->client->base;
-	job->class = ctx->client->base.class;
-	job->serialize = true;
-
-	for (i = 0; i < args->num_cmds; i++) {
-		struct drm_tegra_submit_cmd *cmd = &cmds[i];
-
-		if (cmd->type == DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR) {
-			err = submit_job_add_gather(job, ctx, &cmd->gather_uptr,
-						    bo, &gather_offset,
-						    job_data, &class);
-			if (err)
-				goto free_job;
-		} else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT) {
-			if (cmd->wait_syncpt.reserved[0] ||
-			    cmd->wait_syncpt.reserved[1]) {
-				err = -EINVAL;
-				goto free_job;
-			}
-
-			host1x_job_add_wait(job, cmd->wait_syncpt.id,
-					    cmd->wait_syncpt.threshold);
-		} else {
-			err = -EINVAL;
-			goto free_job;
-		}
-	}
-
-	if (gather_offset == 0) {
-		drm_info(drm, "Job must have at least one gather");
-		err = -EINVAL;
-		goto free_job;
-	}
-
-	*pjob = job;
-
-	err = 0;
-	goto done;
-
-free_job:
-	host1x_job_put(job);
-
-done:
-	kvfree(cmds);
-
-	return err;
-}
-
-static void release_job(struct host1x_job *job)
-{
-	struct tegra_drm_client *client =
-		container_of(job->client, struct tegra_drm_client, base);
-	struct tegra_drm_submit_data *job_data = job->user_data;
-	u32 i;
-
-	for (i = 0; i < job_data->num_used_mappings; i++)
-		tegra_drm_mapping_put(job_data->used_mappings[i].mapping);
-
-	kfree(job_data->used_mappings);
-	kfree(job_data);
-
-	pm_runtime_put_autosuspend(client->base.dev);
-}
-
-int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data,
-				   struct drm_file *file)
-{
-	struct tegra_drm_file *fpriv = file->driver_priv;
-	struct drm_tegra_channel_submit *args = data;
-	struct tegra_drm_submit_data *job_data;
-	struct tegra_drm_channel_ctx *ctx;
-	struct host1x_job *job;
-	struct gather_bo *bo;
-	u32 i;
-	int err;
-
-	ctx = tegra_drm_channel_ctx_lock(fpriv, args->channel_ctx);
-	if (!ctx)
-		return -EINVAL;
-
-	/* Allocate gather BO and copy gather words in. */
-	err = submit_copy_gather_data(drm, &bo, args);
-	if (err)
-		goto unlock;
-
-	job_data = kzalloc(sizeof(*job_data), GFP_KERNEL);
-	if (!job_data) {
-		err = -ENOMEM;
-		goto put_bo;
-	}
-
-	/* Get data buffer mappings and do relocation patching. */
-	err = submit_process_bufs(drm, bo, job_data, ctx, args);
-	if (err)
-		goto free_job_data;
-
-	/* Allocate host1x_job and add gathers and waits to it. */
-	err = submit_create_job(drm, &job, bo, ctx, args,
-				job_data);
-	if (err)
-		goto free_job_data;
-
-	/* Map gather data for Host1x. */
-	err = host1x_job_pin(job, ctx->client->base.dev);
-	if (err)
-		goto put_job;
-
-	/* Boot engine. */
-	err = pm_runtime_get_sync(ctx->client->base.dev);
-	if (err < 0)
-		goto put_pm_runtime;
-
-	job->user_data = job_data;
-	job->release = release_job;
-	job->timeout = 10000;
-
-	/*
-	 * job_data is now part of job reference counting, so don't release
-	 * it from here.
-	 */
-	job_data = NULL;
-
-	/* Submit job to hardware. */
-	err = host1x_job_submit(job);
-	if (err)
-		goto put_job;
-
-	/* Return postfences to userspace and add fences to DMA reservations. */
-	args->syncpt_incr.fence_value = job->syncpt_end;
-
-	goto put_job;
-
-put_pm_runtime:
-	if (!job->release)
-		pm_runtime_put(ctx->client->base.dev);
-	host1x_job_unpin(job);
-put_job:
-	host1x_job_put(job);
-free_job_data:
-	if (job_data && job_data->used_mappings) {
-		for (i = 0; i < job_data->num_used_mappings; i++)
-			tegra_drm_mapping_put(job_data->used_mappings[i].mapping);
-		kfree(job_data->used_mappings);
-	}
-	if (job_data)
-		kfree(job_data);
-put_bo:
-	gather_bo_put(&bo->base);
-unlock:
-	mutex_unlock(&fpriv->lock);
-	return err;
-}
--- a/drivers/gpu/drm/tegra/uapi/uapi.c
+++ b/drivers/gpu/drm/tegra/uapi/uapi.c
@@ -1,285 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (c) 2020 NVIDIA Corporation */
-
-#include <linux/host1x-next.h>
-#include <linux/iommu.h>
-#include <linux/list.h>
-
-#include <drm/drm_drv.h>
-#include <drm/drm_file.h>
-
-#include "../uapi.h"
-#include "../drm.h"
-
-struct tegra_drm_channel_ctx *
-tegra_drm_channel_ctx_lock(struct tegra_drm_file *file, u32 id)
-{
-	struct tegra_drm_channel_ctx *ctx;
-
-	mutex_lock(&file->lock);
-	ctx = xa_load(&file->contexts, id);
-	if (!ctx)
-		mutex_unlock(&file->lock);
-
-	return ctx;
-}
-
-static void tegra_drm_mapping_release(struct kref *ref)
-{
-	struct tegra_drm_mapping *mapping =
-		container_of(ref, struct tegra_drm_mapping, ref);
-
-	host1x_bo_unpin(mapping->map);
-	host1x_bo_put(mapping->bo);
-
-	kfree(mapping);
-}
-
-void tegra_drm_mapping_put(struct tegra_drm_mapping *mapping)
-{
-	kref_put(&mapping->ref, tegra_drm_mapping_release);
-}
-
-static void tegra_drm_channel_ctx_close(struct tegra_drm_channel_ctx *ctx)
-{
-	unsigned long mapping_id;
-	struct tegra_drm_mapping *mapping;
-
-	xa_for_each(&ctx->mappings, mapping_id, mapping)
-		tegra_drm_mapping_put(mapping);
-
-	xa_destroy(&ctx->mappings);
-
-	host1x_channel_put(ctx->channel);
-
-	kfree(ctx);
-}
-
-int close_channel_ctx(int id, void *p, void *data)
-{
-	struct tegra_drm_channel_ctx *ctx = p;
-
-	tegra_drm_channel_ctx_close(ctx);
-
-	return 0;
-}
-
-void tegra_drm_uapi_close_file(struct tegra_drm_file *file)
-{
-	unsigned long ctx_id;
-	struct tegra_drm_channel_ctx *ctx;
-
-	xa_for_each(&file->contexts, ctx_id, ctx)
-		tegra_drm_channel_ctx_close(ctx);
-
-	xa_destroy(&file->contexts);
-}
-
-int tegra_drm_ioctl_channel_open(struct drm_device *drm, void *data,
-				 struct drm_file *file)
-{
-	struct tegra_drm_file *fpriv = file->driver_priv;
-	struct tegra_drm *tegra = drm->dev_private;
-	struct drm_tegra_channel_open *args = data;
-	struct tegra_drm_client *client = NULL;
-	struct tegra_drm_channel_ctx *ctx;
-	int err;
-
-	if (args->flags)
-		return -EINVAL;
-
-	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
-	if (!ctx)
-		return -ENOMEM;
-
-	err = -ENODEV;
-	list_for_each_entry(client, &tegra->clients, list) {
-		if (client->base.class == args->host1x_class) {
-			err = 0;
-			break;
-		}
-	}
-	if (err)
-		goto free_ctx;
-
-	if (client->shared_channel) {
-		ctx->channel = host1x_channel_get(client->shared_channel);
-	} else {
-		ctx->channel = host1x_channel_request(&client->base);
-		if (!ctx->channel) {
-			err = -EBUSY;
-			goto free_ctx;
-		}
-	}
-
-	err = xa_alloc(&fpriv->contexts, &args->channel_ctx, ctx,
-		       XA_LIMIT(1, U32_MAX), GFP_KERNEL);
-	if (err < 0)
-		goto put_channel;
-
-	ctx->client = client;
-	xa_init_flags(&ctx->mappings, XA_FLAGS_ALLOC1);
-
-	args->hardware_version = client->version;
-
-	return 0;
-
-put_channel:
-	host1x_channel_put(ctx->channel);
-free_ctx:
-	kfree(ctx);
-
-	return err;
-}
-
-int tegra_drm_ioctl_channel_close(struct drm_device *drm, void *data,
-				  struct drm_file *file)
-{
-	struct tegra_drm_file *fpriv = file->driver_priv;
-	struct drm_tegra_channel_close *args = data;
-	struct tegra_drm_channel_ctx *ctx;
-
-	ctx = tegra_drm_channel_ctx_lock(fpriv, args->channel_ctx);
-	if (!ctx)
-		return -EINVAL;
-
-	xa_erase(&fpriv->contexts, args->channel_ctx);
-
-	mutex_unlock(&fpriv->lock);
-
-	tegra_drm_channel_ctx_close(ctx);
-
-	return 0;
-}
-
-int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data,
-				struct drm_file *file)
-{
-	struct tegra_drm_file *fpriv = file->driver_priv;
-	struct drm_tegra_channel_map *args = data;
-	struct tegra_drm_channel_ctx *ctx;
-	struct tegra_drm_mapping *mapping;
-	enum dma_data_direction direction;
-	struct drm_gem_object *gem;
-	u32 mapping_id;
-	int err = 0;
-
-	if (args->flags & ~DRM_TEGRA_CHANNEL_MAP_READWRITE)
-		return -EINVAL;
-
-	ctx = tegra_drm_channel_ctx_lock(fpriv, args->channel_ctx);
-	if (!ctx)
-		return -EINVAL;
-
-	mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
-	if (!mapping) {
-		err = -ENOMEM;
-		goto unlock;
-	}
-
-	kref_init(&mapping->ref);
-
-	gem = drm_gem_object_lookup(file, args->handle);
-	if (!gem) {
-		err = -EINVAL;
-		goto free;
-	}
-
-	mapping->bo = &container_of(gem, struct tegra_bo, gem)->base;
-
-	if (args->flags & DRM_TEGRA_CHANNEL_MAP_READWRITE)
-		direction = DMA_BIDIRECTIONAL;
-	else
-		direction = DMA_TO_DEVICE;
-
-	mapping->map = host1x_bo_pin(ctx->client->base.dev, mapping->bo, direction, NULL);
-	if (IS_ERR(mapping->map)) {
-		err = PTR_ERR(mapping->map);
-		goto put_gem;
-	}
-
-	mapping->iova = mapping->map->phys;
-	mapping->iova_end = mapping->iova + gem->size;
-
-	mutex_unlock(&fpriv->lock);
-
-	err = xa_alloc(&ctx->mappings, &mapping_id, mapping,
-		       XA_LIMIT(1, U32_MAX), GFP_KERNEL);
-	if (err < 0)
-		goto unpin;
-
-	args->mapping_id = mapping_id;
-
-	return 0;
-
-unpin:
-	host1x_bo_unpin(mapping->map);
-put_gem:
-	drm_gem_object_put(gem);
-free:
-	kfree(mapping);
-unlock:
-	mutex_unlock(&fpriv->lock);
-	return err;
-}
-
-int tegra_drm_ioctl_channel_unmap(struct drm_device *drm, void *data,
-				  struct drm_file *file)
-{
-	struct tegra_drm_file *fpriv = file->driver_priv;
-	struct drm_tegra_channel_unmap *args = data;
-	struct tegra_drm_channel_ctx *ctx;
-	struct tegra_drm_mapping *mapping;
-
-	ctx = tegra_drm_channel_ctx_lock(fpriv, args->channel_ctx);
-	if (!ctx)
-		return -EINVAL;
-
-	mapping = xa_erase(&ctx->mappings, args->mapping_id);
-
-	mutex_unlock(&fpriv->lock);
-
-	if (mapping) {
-		tegra_drm_mapping_put(mapping);
-		return 0;
-	} else {
-		return -EINVAL;
-	}
-}
-
-int tegra_drm_ioctl_gem_create(struct drm_device *drm, void *data,
-			       struct drm_file *file)
-{
-	struct drm_tegra_gem_create *args = data;
-	struct tegra_bo *bo;
-
-	if (args->flags)
-		return -EINVAL;
-
-	bo = tegra_bo_create_with_handle(file, drm, args->size, args->flags,
-					 &args->handle);
-	if (IS_ERR(bo))
-		return PTR_ERR(bo);
-
-	return 0;
-}
-
-int tegra_drm_ioctl_gem_mmap(struct drm_device *drm, void *data,
-			     struct drm_file *file)
-{
-	struct drm_tegra_gem_mmap *args = data;
-	struct drm_gem_object *gem;
-	struct tegra_bo *bo;
-
-	gem = drm_gem_object_lookup(file, args->handle);
-	if (!gem)
-		return -EINVAL;
-
-	bo = to_tegra_bo(gem);
-
-	args->offset = drm_vma_node_offset_addr(&bo->gem.vma_node);
-
-	drm_gem_object_put(gem);
-
-	return 0;
-}
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -14,6 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
+#include <linux/version.h>

 #include <soc/tegra/pmc.h>

@@ -332,9 +333,14 @@ static int vic_open_channel(struct tegra_drm_client *client,
 	struct vic *vic = to_vic(client);
 	int err;

+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
+	err = pm_runtime_resume_and_get(vic->dev);
+	if (err < 0) {
+#else
 	err = pm_runtime_get_sync(vic->dev);
 	if (err < 0) {
-		pm_runtime_put(vic->dev);
+		pm_runtime_put_noidle(vic->dev);
+#endif
 		return err;
 	}

--- a/drivers/gpu/drm/tegra/vic.h
+++ b/drivers/gpu/drm/tegra/vic.h
@@ -8,7 +8,6 @@

 /* VIC methods */

-#define VIC_SET_APPLICATION_ID			0x00000200
 #define VIC_SET_FCE_UCODE_SIZE			0x0000071C
 #define VIC_SET_FCE_UCODE_OFFSET		0x0000072C