Open source GPL/LGPL release

2025-12-22 09:12:24 +03:00 · 2025-12-19 15:25:44 -08:00
commit 9fc87a7ec7
2261 changed files with 576825 additions and 0 deletions
--- a/include/linux/tegra_gpu_t19x.h
+++ b/include/linux/tegra_gpu_t19x.h
@@ -0,0 +1,24 @@
+/*
+ * Tegra GPU Virtualization Interfaces to Server
+ *
+ * Copyright (c) 2016, NVIDIA Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __TEGRA_GPU_T19X_H
+#define __TEGRA_GPU_T19X_H
+
+#define GPU_LIT_NUM_SUBCTX	99
+
+#endif
--- a/include/trace/events/gk20a.h
+++ b/include/trace/events/gk20a.h
@@ -0,0 +1,619 @@
+/*
+ * gk20a event logging to ftrace.
+ *
+ * Copyright (c) 2014-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifdef __KERNEL__
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gk20a
+
+#if !defined(__NVGPU_TRACE_GK20A_H__) || defined(TRACE_HEADER_MULTI_READ)
+#define __NVGPU_TRACE_GK20A_H__
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(gk20a,
+	TP_PROTO(const char *name),
+	TP_ARGS(name),
+	TP_STRUCT__entry(__field(const char *, name)),
+	TP_fast_assign(__entry->name = name;),
+	TP_printk("name=%s", __entry->name)
+);
+
+DEFINE_EVENT(gk20a, gk20a_channel_open,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, gk20a_channel_release,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, gk20a_pm_unrailgate,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, gk20a_finalize_poweron,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, gk20a_finalize_poweron_done,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, gk20a_mm_l2_invalidate,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, gk20a_mm_l2_invalidate_done,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, gk20a_mm_l2_flush,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, gk20a_mm_l2_flush_done,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, gk20a_mm_tlb_invalidate,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, gk20a_mm_tlb_invalidate_done,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, gk20a_mm_fb_flush,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, gk20a_mm_fb_flush_done,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, mc_gk20a_intr_thread_stall,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, mc_gk20a_intr_thread_stall_done,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, mc_gk20a_intr_stall,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, mc_gk20a_intr_stall_done,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DEFINE_EVENT(gk20a, gr_gk20a_handle_sw_method,
+	TP_PROTO(const char *name),
+	TP_ARGS(name)
+);
+
+DECLARE_EVENT_CLASS(gk20a_channel,
+	TP_PROTO(int channel),
+	TP_ARGS(channel),
+	TP_STRUCT__entry(__field(int, channel)),
+	TP_fast_assign(__entry->channel = channel;),
+	TP_printk("ch id %d", __entry->channel)
+);
+DEFINE_EVENT(gk20a_channel, nvgpu_channel_update,
+	TP_PROTO(int channel),
+	TP_ARGS(channel)
+);
+DEFINE_EVENT(gk20a_channel, gk20a_free_channel,
+	TP_PROTO(int channel),
+	TP_ARGS(channel)
+);
+DEFINE_EVENT(gk20a_channel, nvgpu_channel_open_new,
+	TP_PROTO(int channel),
+	TP_ARGS(channel)
+);
+DEFINE_EVENT(gk20a_channel, gk20a_release_used_channel,
+	TP_PROTO(int channel),
+	TP_ARGS(channel)
+);
+
+DECLARE_EVENT_CLASS(gk20a_channel_getput,
+	TP_PROTO(int channel, const char *caller),
+	TP_ARGS(channel, caller),
+	TP_STRUCT__entry(
+		__field(int, channel)
+		__field(const char *, caller)
+	),
+	TP_fast_assign(
+		__entry->channel = channel;
+		__entry->caller = caller;
+	),
+	TP_printk("channel %d caller %s", __entry->channel, __entry->caller)
+);
+DEFINE_EVENT(gk20a_channel_getput, nvgpu_channel_get,
+	TP_PROTO(int channel, const char *caller),
+	TP_ARGS(channel, caller)
+);
+DEFINE_EVENT(gk20a_channel_getput, nvgpu_channel_put,
+	TP_PROTO(int channel, const char *caller),
+	TP_ARGS(channel, caller)
+);
+DEFINE_EVENT(gk20a_channel_getput, gk20a_channel_put_nofree,
+	TP_PROTO(int channel, const char *caller),
+	TP_ARGS(channel, caller)
+);
+
+DECLARE_EVENT_CLASS(gk20a_channel_sched_params,
+	TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice,
+		u32 timeout, const char *interleave,
+		const char *graphics_preempt_mode,
+		const char *compute_preempt_mode),
+	TP_ARGS(chid, tsgid, pid, timeslice, timeout,
+		interleave, graphics_preempt_mode, compute_preempt_mode),
+	TP_STRUCT__entry(
+		__field(int, chid)
+		__field(int, tsgid)
+		__field(pid_t, pid)
+		__field(u32, timeslice)
+		__field(u32, timeout)
+		__field(const char *, interleave)	/* no need to copy */
+		__field(const char *, graphics_preempt_mode)	/* no need to copy */
+		__field(const char *, compute_preempt_mode)	/* no need to copy */
+	),
+	TP_fast_assign(
+		__entry->chid = chid;
+		__entry->tsgid = tsgid;
+		__entry->pid = pid;
+		__entry->timeslice = timeslice;
+		__entry->timeout = timeout;
+		__entry->interleave = interleave;
+		__entry->graphics_preempt_mode = graphics_preempt_mode;
+		__entry->compute_preempt_mode = compute_preempt_mode;
+	),
+	TP_printk("chid=%d tsgid=%d pid=%d timeslice=%u timeout=%u interleave=%s graphics_preempt=%s compute_preempt=%s",
+		__entry->chid, __entry->tsgid, __entry->pid,
+		__entry->timeslice, __entry->timeout,
+		__entry->interleave, __entry->graphics_preempt_mode,
+		__entry->compute_preempt_mode)
+);
+
+DEFINE_EVENT(gk20a_channel_sched_params, gk20a_channel_sched_defaults,
+	TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice,
+		u32 timeout, const char *interleave,
+		const char *graphics_preempt_mode,
+		const char *compute_preempt_mode),
+	TP_ARGS(chid, tsgid, pid, timeslice, timeout,
+		interleave, graphics_preempt_mode, compute_preempt_mode)
+);
+
+DEFINE_EVENT(gk20a_channel_sched_params, gk20a_channel_set_priority,
+	TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice,
+		u32 timeout, const char *interleave,
+		const char *graphics_preempt_mode,
+		const char *compute_preempt_mode),
+	TP_ARGS(chid, tsgid, pid, timeslice, timeout,
+		interleave, graphics_preempt_mode, compute_preempt_mode)
+);
+
+DEFINE_EVENT(gk20a_channel_sched_params, gk20a_channel_set_runlist_interleave,
+	TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice,
+		u32 timeout, const char *interleave,
+		const char *graphics_preempt_mode,
+		const char *compute_preempt_mode),
+	TP_ARGS(chid, tsgid, pid, timeslice, timeout,
+		interleave, graphics_preempt_mode, compute_preempt_mode)
+);
+
+DEFINE_EVENT(gk20a_channel_sched_params, gk20a_channel_set_timeout,
+	TP_PROTO(int chid, int tsgid, pid_t pid, u32 timeslice,
+		u32 timeout, const char *interleave,
+		const char *graphics_preempt_mode,
+		const char *compute_preempt_mode),
+	TP_ARGS(chid, tsgid, pid, timeslice, timeout,
+		interleave, graphics_preempt_mode, compute_preempt_mode)
+);
+
+TRACE_EVENT(gk20a_push_cmdbuf,
+	TP_PROTO(const char *name, u32 mem_id,
+			u32 words, u32 offset, void *cmdbuf),
+
+	TP_ARGS(name, mem_id, words, offset, cmdbuf),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(u32, mem_id)
+		__field(u32, words)
+		__field(u32, offset)
+		__field(bool, cmdbuf)
+		__dynamic_array(u32, cmdbuf, words)
+	),
+
+	TP_fast_assign(
+		if (cmdbuf) {
+			(void) memcpy(__get_dynamic_array(cmdbuf),
+				cmdbuf+offset, words * sizeof(u32));
+		}
+		__entry->cmdbuf = cmdbuf;
+		__entry->name = name;
+		__entry->mem_id = mem_id;
+		__entry->words = words;
+		__entry->offset = offset;
+	),
+
+	TP_printk("name=%s, mem_id=%08x, words=%u, offset=%d, contents=[%s]",
+	  __entry->name, __entry->mem_id,
+	  __entry->words, __entry->offset,
+	  __print_hex(__get_dynamic_array(cmdbuf),
+		  __entry->cmdbuf ? __entry->words * 4 : 0))
+);
+
+TRACE_EVENT(gk20a_channel_submit_gpfifo,
+		TP_PROTO(const char *name, u32 chid, u32 num_entries,
+		u32 flags, u32 wait_id, u32 wait_value),
+
+		TP_ARGS(name, chid, num_entries, flags, wait_id, wait_value),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(u32, chid)
+		__field(u32, num_entries)
+		__field(u32, flags)
+		__field(u32, wait_id)
+		__field(u32, wait_value)
+	),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->chid = chid;
+		__entry->num_entries = num_entries;
+		__entry->flags = flags;
+		__entry->wait_id = wait_id;
+		__entry->wait_value = wait_value;
+	),
+
+	TP_printk("name=%s, chid=%d, num_entries=%u, flags=%u, wait_id=%d,"
+		" wait_value=%u",
+		__entry->name, __entry->chid, __entry->num_entries,
+		__entry->flags, __entry->wait_id, __entry->wait_value)
+);
+
+TRACE_EVENT(gk20a_channel_submitted_gpfifo,
+		TP_PROTO(const char *name, u32 chid, u32 num_entries,
+		u32 flags, u32 incr_id, u32 incr_value),
+
+		TP_ARGS(name, chid, num_entries, flags,
+			incr_id, incr_value),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(u32, chid)
+		__field(u32, num_entries)
+		__field(u32, flags)
+		__field(u32, incr_id)
+		__field(u32, incr_value)
+	),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->chid = chid;
+		__entry->num_entries = num_entries;
+		__entry->flags = flags;
+		__entry->incr_id = incr_id;
+		__entry->incr_value = incr_value;
+	),
+
+	TP_printk("name=%s, chid=%d, num_entries=%u, flags=%u,"
+		" incr_id=%u, incr_value=%u",
+		__entry->name, __entry->chid, __entry->num_entries,
+		__entry->flags, __entry->incr_id, __entry->incr_value)
+);
+
+TRACE_EVENT(gk20a_reschedule_preempt_next,
+		TP_PROTO(u32 chid, u32 fecs0, u32 engstat, u32 fecs1, u32 fecs2,
+			u32 preempt),
+
+		TP_ARGS(chid, fecs0, engstat, fecs1, fecs2, preempt),
+
+	TP_STRUCT__entry(
+		__field(u32, chid)
+		__field(u32, fecs0)
+		__field(u32, engstat)
+		__field(u32, fecs1)
+		__field(u32, fecs2)
+		__field(u32, preempt)
+	),
+
+	TP_fast_assign(
+		__entry->chid = chid;
+		__entry->fecs0 = fecs0;
+		__entry->engstat = engstat;
+		__entry->fecs1 = fecs1;
+		__entry->fecs2 = fecs2;
+		__entry->preempt = preempt;
+	),
+
+	TP_printk("chid=%d, fecs0=%#x, engstat=%#x, fecs1=%#x, fecs2=%#x,"
+		" preempt=%#x", __entry->chid, __entry->fecs0, __entry->engstat,
+		__entry->fecs1,	__entry->fecs2, __entry->preempt)
+);
+
+TRACE_EVENT(gk20a_reschedule_preempted_next,
+		TP_PROTO(u32 chid),
+
+		TP_ARGS(chid),
+
+	TP_STRUCT__entry(
+		__field(u32, chid)
+	),
+
+	TP_fast_assign(
+		__entry->chid = chid;
+	),
+
+	TP_printk("chid=%d", __entry->chid)
+);
+
+TRACE_EVENT(gk20a_channel_reset,
+		TP_PROTO(u32 chid, u32 tsgid),
+
+		TP_ARGS(chid, tsgid),
+
+	TP_STRUCT__entry(
+		__field(u32, chid)
+		__field(u32, tsgid)
+	),
+
+	TP_fast_assign(
+		__entry->chid = chid;
+		__entry->tsgid = tsgid;
+	),
+
+	TP_printk("chid=%d, tsgid=%d",
+		__entry->chid, __entry->tsgid)
+);
+
+
+TRACE_EVENT(gk20a_as_dev_open,
+	TP_PROTO(const char *name),
+	TP_ARGS(name),
+	TP_STRUCT__entry(
+			 __field(const char *, name)
+			 ),
+	TP_fast_assign(
+		       __entry->name = name;
+		       ),
+	TP_printk("name=%s ",  __entry->name)
+);
+
+TRACE_EVENT(gk20a_as_dev_release,
+	TP_PROTO(const char *name),
+	TP_ARGS(name),
+	TP_STRUCT__entry(
+			 __field(const char *, name)
+			 ),
+	TP_fast_assign(
+		       __entry->name = name;
+		       ),
+	TP_printk("name=%s ",  __entry->name)
+);
+
+
+TRACE_EVENT(gk20a_as_ioctl_bind_channel,
+	TP_PROTO(const char *name),
+	TP_ARGS(name),
+	TP_STRUCT__entry(
+			 __field(const char *, name)
+			 ),
+	TP_fast_assign(
+		       __entry->name = name;
+		       ),
+	TP_printk("name=%s ",  __entry->name)
+);
+
+
+TRACE_EVENT(gk20a_as_ioctl_alloc_space,
+	TP_PROTO(const char *name),
+	TP_ARGS(name),
+	TP_STRUCT__entry(
+			 __field(const char *, name)
+			 ),
+	TP_fast_assign(
+		       __entry->name = name;
+		       ),
+	TP_printk("name=%s ",  __entry->name)
+);
+
+TRACE_EVENT(gk20a_as_ioctl_free_space,
+	TP_PROTO(const char *name),
+	TP_ARGS(name),
+	TP_STRUCT__entry(
+			 __field(const char *, name)
+			 ),
+	TP_fast_assign(
+		       __entry->name = name;
+		       ),
+	TP_printk("name=%s ",  __entry->name)
+);
+
+TRACE_EVENT(gk20a_as_ioctl_map_buffer,
+	TP_PROTO(const char *name),
+	TP_ARGS(name),
+	TP_STRUCT__entry(
+			 __field(const char *, name)
+			 ),
+	TP_fast_assign(
+		       __entry->name = name;
+		       ),
+	TP_printk("name=%s ",  __entry->name)
+);
+
+TRACE_EVENT(gk20a_as_ioctl_unmap_buffer,
+	TP_PROTO(const char *name),
+	TP_ARGS(name),
+	TP_STRUCT__entry(
+			 __field(const char *, name)
+			 ),
+	TP_fast_assign(
+		       __entry->name = name;
+		       ),
+	TP_printk("name=%s ",  __entry->name)
+);
+
+TRACE_EVENT(gk20a_as_ioctl_get_va_regions,
+	TP_PROTO(const char *name),
+	TP_ARGS(name),
+	TP_STRUCT__entry(
+			 __field(const char *, name)
+			 ),
+	TP_fast_assign(
+		       __entry->name = name;
+		       ),
+	TP_printk("name=%s ",  __entry->name)
+);
+
+TRACE_EVENT(gk20a_mmu_fault,
+	    TP_PROTO(u64 fault_addr,
+		     u32 fault_type,
+		     u32 access_type,
+		     u64 inst_ptr,
+		     u32 engine_id,
+		     const char *client_type_desc,
+		     const char *client_id_desc,
+		     const char *fault_type_desc),
+	    TP_ARGS(fault_addr, fault_type, access_type,
+		    inst_ptr, engine_id, client_type_desc,
+		    client_id_desc, fault_type_desc),
+	    TP_STRUCT__entry(
+			 __field(u64, fault_addr)
+			 __field(u32, fault_type)
+			 __field(u32, access_type)
+			 __field(u64, inst_ptr)
+			 __field(u32, engine_id)
+			 __field(const char *, client_type_desc)
+			 __field(const char *, client_id_desc)
+			 __field(const char *, fault_type_desc)
+			 ),
+	    TP_fast_assign(
+		       __entry->fault_addr = fault_addr;
+		       __entry->fault_type = fault_type;
+		       __entry->access_type = access_type;
+		       __entry->inst_ptr = inst_ptr;
+		       __entry->engine_id = engine_id;
+		       __entry->client_type_desc = client_type_desc;
+		       __entry->client_id_desc = client_id_desc;
+		       __entry->fault_type_desc = fault_type_desc;
+		       ),
+	    TP_printk("fault addr=0x%llx type=0x%x access_type=0x%x "
+			"instance=0x%llx engine_id=%d client_type=%s "
+			"client_id=%s fault type=%s",
+		      __entry->fault_addr, __entry->fault_type,
+			 __entry->access_type, __entry->inst_ptr,
+			 __entry->engine_id, __entry->client_type_desc,
+		      __entry->client_id_desc, __entry->fault_type_desc)
+);
+
+TRACE_EVENT(gk20a_ltc_cbc_ctrl_start,
+		TP_PROTO(const char *name, u32 cbc_ctrl, u32 min_value,
+		u32 max_value),
+		TP_ARGS(name, cbc_ctrl, min_value, max_value),
+
+	TP_STRUCT__entry(
+		__field(const char *, name)
+		__field(u32, cbc_ctrl)
+		__field(u32, min_value)
+		__field(u32, max_value)
+	),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->cbc_ctrl = cbc_ctrl;
+		__entry->min_value = min_value;
+		__entry->max_value = max_value;
+	),
+
+	TP_printk("name=%s, cbc_ctrl=%d, min_value=%u, max_value=%u",
+		__entry->name, __entry->cbc_ctrl, __entry->min_value,
+		__entry->max_value)
+);
+
+TRACE_EVENT(gk20a_ltc_cbc_ctrl_done,
+	TP_PROTO(const char *name),
+	TP_ARGS(name),
+	TP_STRUCT__entry(
+			 __field(const char *, name)
+			 ),
+	TP_fast_assign(
+		       __entry->name = name;
+		       ),
+	TP_printk("name=%s ",  __entry->name)
+
+);
+
+DECLARE_EVENT_CLASS(gk20a_cde,
+	TP_PROTO(const void *ctx),
+	TP_ARGS(ctx),
+	TP_STRUCT__entry(__field(const void *, ctx)),
+	TP_fast_assign(__entry->ctx = ctx;),
+	TP_printk("ctx=%p", __entry->ctx)
+);
+
+DEFINE_EVENT(gk20a_cde, gk20a_cde_remove_ctx,
+	TP_PROTO(const void *ctx),
+	TP_ARGS(ctx)
+);
+
+DEFINE_EVENT(gk20a_cde, gk20a_cde_release,
+	TP_PROTO(const void *ctx),
+	TP_ARGS(ctx)
+);
+
+DEFINE_EVENT(gk20a_cde, gk20a_cde_get_context,
+	TP_PROTO(const void *ctx),
+	TP_ARGS(ctx)
+);
+
+DEFINE_EVENT(gk20a_cde, gk20a_cde_allocate_context,
+	TP_PROTO(const void *ctx),
+	TP_ARGS(ctx)
+);
+
+DEFINE_EVENT(gk20a_cde, gk20a_cde_finished_ctx_cb,
+	TP_PROTO(const void *ctx),
+	TP_ARGS(ctx)
+);
+
+#endif /*  _TRACE_GK20A_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
+#endif
--- a/include/uapi/linux/nvgpu-as.h
+++ b/include/uapi/linux/nvgpu-as.h
@@ -0,0 +1,458 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * /dev/nvhost-as-gpu device
+ *
+ * Opening a '/dev/nvhost-as-gpu' device node creates a new address
+ * space. nvgpu channels (for the same module) can then be bound to such an
+ * address space to define the addresses it has access to.
+ *
+ * Once a nvgpu channel has been bound to an address space it cannot be
+ * unbound. There is no support for allowing an nvgpu channel to change from
+ * one address space to another (or from one to none).
+ *
+ * As long as there is an open device file to the address space, or any bound
+ * nvgpu channels it will be valid.  Once all references to the address space
+ * are removed the address space is deleted.
+ *
+ */
+
+#ifndef _UAPI__LINUX_NVGPU_AS_H__
+#define _UAPI__LINUX_NVGPU_AS_H__
+
+#include "nvgpu-uapi-common.h"
+
+#define NVGPU_AS_IOCTL_MAGIC 'A'
+
+/*
+ * Allocating an address space range:
+ *
+ * Address ranges created with this ioctl are reserved for later use with
+ * fixed-address buffer mappings.
+ *
+ * If _FLAGS_FIXED_OFFSET is specified then the new range starts at the 'offset'
+ * given.  Otherwise the address returned is chosen to be a multiple of 'align.'
+ *
+ */
+struct nvgpu32_as_alloc_space_args {
+	__u32 pages;     /* in, pages */
+	__u32 page_size; /* in, bytes */
+	__u32 flags;     /* in */
+#define NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET 0x1
+#define NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE 0x2
+	union {
+		__u64 offset; /* inout, byte address valid iff _FIXED_OFFSET */
+		__u64 align;  /* in, alignment multiple (0:={1 or n/a}) */
+	} o_a;
+};
+
+struct nvgpu_as_alloc_space_args {
+	__u64 pages;     /* in, pages */
+	__u32 page_size; /* in, bytes */
+	__u32 flags;     /* in */
+	union {
+		__u64 offset; /* inout, byte address valid iff _FIXED_OFFSET */
+		__u64 align;  /* in, alignment multiple (0:={1 or n/a}) */
+	} o_a;
+	__u32 padding[2];     /* in */
+};
+
+/*
+ * Releasing an address space range:
+ *
+ * The previously allocated region starting at 'offset' is freed.  If there are
+ * any buffers currently mapped inside the region the ioctl will fail.
+ */
+struct nvgpu_as_free_space_args {
+	__u64 offset; /* in, byte address */
+	__u64 pages;     /* in, pages */
+	__u32 page_size; /* in, bytes */
+	__u32 padding[3];
+};
+
+/*
+ * Binding a nvgpu channel to an address space:
+ *
+ * A channel must be bound to an address space before allocating a gpfifo
+ * in nvgpu.  The 'channel_fd' given here is the fd used to allocate the
+ * channel.  Once a channel has been bound to an address space it cannot
+ * be unbound (except for when the channel is destroyed).
+ */
+struct nvgpu_as_bind_channel_args {
+	__u32 channel_fd; /* in */
+};
+
+/*
+ * Mapping nvmap buffers into an address space:
+ *
+ * The start address is the 'offset' given if _FIXED_OFFSET is specified.
+ * Otherwise the address returned is a multiple of 'align.'
+ *
+ * If 'page_size' is set to 0 the nvmap buffer's allocation alignment/sizing
+ * will be used to determine the page size (largest possible).  The page size
+ * chosen will be returned back to the caller in the 'page_size' parameter in
+ * that case.
+ */
+#define NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET	    (1 << 0)
+#define NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE	    (1 << 2)
+#define NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT	    (1 << 4)
+#define NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE	    (1 << 5)
+#define NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS (1 << 6)
+#define NVGPU_AS_MAP_BUFFER_FLAGS_L3_ALLOC          (1 << 7)
+#define NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL  (1 << 8)
+#define NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC   (1 << 9)
+#define NVGPU_AS_MAP_BUFFER_FLAGS_ACCESS_NO_WRITE   (1 << 10)
+
+/*
+ * VM map buffer IOCTL
+ *
+ * This ioctl maps a buffer - generally a dma_buf FD - into the VM's address
+ * space. Usage of this API is as follows.
+ *
+ * @flags  [IN]
+ *
+ *   These are the flags passed to the IOCTL to modify the IOCTL behavior. The
+ *   following flags are supported:
+ *
+ *   %NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET
+ *
+ *     Specify that the mapping already has an address. The mapping address
+ *     must reside in an area already reserved with the as_alloc_space IOCTL.
+ *     If this flag is set then the @offset field must be populated with the
+ *     address to map to.
+ *
+ *   %NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE
+ *
+ *     Specify that a mapping shall be GPU cachable.
+ *
+ *   %NVGPU_AS_MAP_BUFFER_FLAGS_IO_COHERENT
+ *
+ *     Specify that a mapping shall be IO coherent.
+ *
+ *     DEPRECATED: do not use! This will be removed in a future update.
+ *
+ *   %NVGPU_AS_MAP_BUFFER_FLAGS_UNMAPPED_PTE
+ *
+ *     Specify that a mapping shall be marked as invalid but otherwise
+ *     populated. This flag doesn't actually make a lot of sense. The
+ *     only reason to specify it is for testing replayable faults but
+ *     an actual useful implementation of such a feature would likely
+ *     not use this.
+ *
+ *     DEPRECATED: do not use! This will be removed in a future update.
+ *
+ *   %NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS
+ *
+ *     Deprecated.
+ *
+ *   %NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL
+ *
+ *     Set when userspace plans to pass in @compr_kind and @incompr_kind
+ *     instead of letting the kernel work out kind fields.
+ *
+ *   %NVGPU_AS_MAP_BUFFER_FLAGS_PLATFORM_ATOMIC
+ *
+ *     Specify that a mapping should use platform atomics.
+ *
+ * @kind  [IN]
+ *
+ *   Specify the kind to use for the mapping.
+ *
+ * @compr_kind  [IN]
+ * @incompr_kind  [IN]
+ *
+ *   Specify the compressible and incompressible kinds to be used for the
+ *   mapping. Requires that %NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL is
+ *   set in @flags. The kernel will attempt to use @comp_kind and if for
+ *   some reason that is not possible will then fall back to using the
+ *   @incompr_kind.
+ *
+ * @dmabuf_fd  [IN]
+ *
+ *   FD pointing to the dmabuf that will be mapped into the GMMU.
+ *
+ * @page_size  [IN]
+ *
+ *   Specify the page size for the mapping. Must be set to a valid, supported
+ *   page size. If left unset this IOCTL will return -EINVAL. In general, a
+ *   small page size mapping will always be supported, but in certain cases of
+ *   compression this will not be the case.
+ *
+ * @buffer_offset  [IN]
+ *
+ *   Specify an offset into the physical buffer to begin the mapping at. For
+ *   example imagine a DMA buffer 32KB long. However, you wish to only map
+ *   this buffer starting at 8KB. In such a case you would pass 8KB as the
+ *   @buffer_offset. This is only available with fixed address mappings. All
+ *   regular (non-fixed) mappings require this field to be set to 0. This field
+ *   is in bytes.
+ *
+ * @mapping_size  [IN]
+ *
+ *   The size of the mapping in bytes. This is from the @buffer_offset position.
+ *   So for example, assuming you have a 32KB physical buffer and you want to
+ *   map only 8KB of it, starting at some offset, then you would specify 8192 in
+ *   this field. Of course this size + the buffer_offset must be less than the
+ *   length of the physical buffer; otherwise -EINVAL is returned. This is only
+ *   supported for fixed mappings.
+ *
+ * @offset  [IN, OUT]
+ *
+ *   The offset of the buffer in the GPU virtual address space. In other words
+ *   the virtual address of the buffer. If the
+ *   %NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET flag is set then this field must be
+ *   populated by userspace. In all cases the ultimate mapped address is
+ *   returned in this field. The field is in bytes.
+ */
+struct nvgpu_as_map_buffer_ex_args {
+	/* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL must be set */
+	__u32 flags;		/* in/out */
+
+	/*
+	 * - If both compr_kind and incompr_kind are set
+	 *   (i.e., value is other than NV_KIND_INVALID),
+	 *   kernel attempts to use compr_kind first.
+	 *
+	 * - If compr_kind is set, kernel attempts to allocate
+	 *   comptags for the buffer. If successful,
+	 *   compr_kind is used as the PTE kind.
+	 *
+	 * - If incompr_kind is set, kernel uses incompr_kind as the
+	 *   PTE kind, if compr_kind cannot be used. Comptags are not
+	 *   allocated.
+	 *
+	 * - If neither compr_kind or incompr_kind is set, the
+	 *   map call will fail.
+	 */
+#define NV_KIND_INVALID -1
+	__s16 compr_kind;
+	__s16 incompr_kind;
+
+	__u32 dmabuf_fd;	/* in */
+	__u32 page_size;	/* inout, 0:= best fit to buffer */
+
+	__u64 buffer_offset;	/* in, offset of mapped buffer region */
+	__u64 mapping_size;	/* in, size of mapped buffer region */
+
+	__u64 offset;		/* in/out, we use this address if flag
+				 * FIXED_OFFSET is set. This will fail
+				 * if space is not properly allocated. The
+				 * actual virtual address to which we mapped
+				 * the buffer is returned in this field. */
+};
+
+/*
+ * Get info about buffer compbits. Requires that buffer is mapped with
+ * NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS.
+ *
+ * The compbits for a mappable buffer are organized in a mappable
+ * window to the compbits store. In case the window contains comptags
+ * for more than one buffer, the buffer comptag line index may differ
+ * from the window comptag line index.
+ */
+struct nvgpu_as_get_buffer_compbits_info_args {
+
+	/* in: address of an existing buffer mapping */
+	__u64 mapping_gva;
+
+	/* out: size of compbits mapping window (bytes) */
+	__u64 compbits_win_size;
+
+	/* out: comptag line index of the window start */
+	__u32 compbits_win_ctagline;
+
+	/* out: comptag line index of the buffer mapping */
+	__u32 mapping_ctagline;
+
+/* Buffer uses compbits */
+#define NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_HAS_COMPBITS    (1 << 0)
+
+/* Buffer compbits are mappable */
+#define NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_MAPPABLE        (1 << 1)
+
+/* Buffer IOVA addresses are discontiguous */
+#define NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_DISCONTIG_IOVA  (1 << 2)
+
+	/* out */
+	__u32 flags;
+
+	__u32 reserved1;
+};
+
+/*
+ * Map compbits of a mapped buffer to the GPU address space. The
+ * compbits mapping is automatically unmapped when the buffer is
+ * unmapped.
+ *
+ * The compbits mapping always uses small pages, it is read-only, and
+ * is GPU cacheable. The mapping is a window to the compbits
+ * store. The window may not be exactly the size of the cache lines
+ * for the buffer mapping.
+ */
+struct nvgpu_as_map_buffer_compbits_args {
+
+	/* in: address of an existing buffer mapping */
+	__u64 mapping_gva;
+
+	/* in: gva to the mapped compbits store window when
+	 * FIXED_OFFSET is set. Otherwise, ignored and should be be 0.
+	 *
+	 * For FIXED_OFFSET mapping:
+	 * - If compbits are already mapped compbits_win_gva
+	 *   must match with the previously mapped gva.
+	 * - The user must have allocated enough GVA space for the
+	 *   mapping window (see compbits_win_size in
+	 *   nvgpu_as_get_buffer_compbits_info_args)
+	 *
+	 * out: gva to the mapped compbits store window */
+	__u64 compbits_win_gva;
+
+	/* in: reserved, must be 0
+	   out: physical or IOMMU address for mapping */
+	union {
+		/* contiguous iova addresses */
+		__u64 mapping_iova;
+
+		/* buffer to receive discontiguous iova addresses (reserved) */
+		__u64 mapping_iova_buf_addr;
+	};
+
+	/* in: Buffer size (in bytes) for discontiguous iova
+	 * addresses. Reserved, must be 0. */
+	__u64 mapping_iova_buf_size;
+
+#define NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET        (1 << 0)
+	__u32 flags;
+	__u32 reserved1;
+};
+
+/*
+ * Unmapping a buffer:
+ *
+ * To unmap a previously mapped buffer set 'offset' to the offset returned in
+ * the mapping call.  This includes where a buffer has been mapped into a fixed
+ * offset of a previously allocated address space range.
+ */
+struct nvgpu_as_unmap_buffer_args {
+	__u64 offset; /* in, byte address */
+};
+
+
+struct nvgpu_as_va_region {
+	__u64 offset;
+	__u32 page_size;
+	__u32 reserved;
+	__u64 pages;
+};
+
+struct nvgpu_as_get_va_regions_args {
+	__u64 buf_addr; /* Pointer to array of nvgpu_as_va_region:s.
+			 * Ignored if buf_size is 0 */
+	__u32 buf_size; /* in:  userspace buf size (in bytes)
+			   out: kernel buf size    (in bytes) */
+	__u32 reserved;
+};
+
+struct nvgpu_as_map_buffer_batch_args {
+	__u64 unmaps; /* ptr to array of nvgpu_as_unmap_buffer_args */
+	__u64 maps;   /* ptr to array of nvgpu_as_map_buffer_ex_args */
+	__u32 num_unmaps; /* in: number of unmaps
+			   * out: on error, number of successful unmaps */
+	__u32 num_maps;   /* in: number of maps
+			   * out: on error, number of successful maps */
+	__u64 reserved;
+};
+
+struct nvgpu_as_get_sync_ro_map_args {
+	__u64 base_gpuva;
+	__u32 sync_size;
+	__u32 num_syncpoints;
+};
+
+/*
+ * VM mapping modify IOCTL
+ *
+ * This ioctl changes the kind of an existing mapped buffer region.
+ *
+ * Usage of this API is as follows.
+ *
+ * @compr_kind  [IN]
+ *
+ *   Specify the new compressed kind to be used for the mapping.  This
+ *   parameter is only valid if compression resources are allocated to the
+ *   underlying physical buffer. If NV_KIND_INVALID is specified then the
+ *   fallback incompr_kind parameter is used.
+ *
+ * @incompr_kind  [IN]
+ *
+ *   Specify the new kind to be used for the mapping if compression is not
+ *   to be used.  If NV_KIND_INVALID is specified then incompressible fallback
+ *   is not allowed.
+ *
+ * @buffer_offset  [IN]
+ *
+ *   Specifies the beginning offset of the region within the existing buffer
+ *   for which the kind should be modified.  This field is in bytes.
+ *
+ * @buffer_size  [IN]
+ *
+ *   Specifies the size of the region within the existing buffer for which the
+ *   kind should be updated.  This field is in bytes.  Note that the region
+ *   described by <buffer_offset, buffer_offset + buffer_size> must reside
+ *   entirely within the existing buffer.
+ *
+ * @map_address  [IN]
+ *
+ *   The address of the existing buffer in the GPU virtual address space
+ *   specified in bytes.
+ */
+struct nvgpu_as_mapping_modify_args {
+	__s16 compr_kind;       /* in */
+	__s16 incompr_kind;     /* in */
+
+	__u64 buffer_offset;	/* in, offset of mapped buffer region */
+	__u64 buffer_size;	/* in, size of mapped buffer region */
+
+	__u64 map_address;	/* in, base virtual address of mapped buffer */
+};
+
+#define NVGPU_AS_IOCTL_BIND_CHANNEL \
+	_IOWR(NVGPU_AS_IOCTL_MAGIC, 1, struct nvgpu_as_bind_channel_args)
+#define NVGPU32_AS_IOCTL_ALLOC_SPACE \
+	_IOWR(NVGPU_AS_IOCTL_MAGIC, 2, struct nvgpu32_as_alloc_space_args)
+#define NVGPU_AS_IOCTL_FREE_SPACE \
+	_IOWR(NVGPU_AS_IOCTL_MAGIC, 3, struct nvgpu_as_free_space_args)
+#define NVGPU_AS_IOCTL_UNMAP_BUFFER \
+	_IOWR(NVGPU_AS_IOCTL_MAGIC, 5, struct nvgpu_as_unmap_buffer_args)
+#define NVGPU_AS_IOCTL_ALLOC_SPACE \
+	_IOWR(NVGPU_AS_IOCTL_MAGIC, 6, struct nvgpu_as_alloc_space_args)
+#define NVGPU_AS_IOCTL_MAP_BUFFER_EX \
+	_IOWR(NVGPU_AS_IOCTL_MAGIC, 7, struct nvgpu_as_map_buffer_ex_args)
+#define NVGPU_AS_IOCTL_GET_VA_REGIONS \
+	_IOWR(NVGPU_AS_IOCTL_MAGIC, 8, struct nvgpu_as_get_va_regions_args)
+#define NVGPU_AS_IOCTL_GET_BUFFER_COMPBITS_INFO \
+	_IOWR(NVGPU_AS_IOCTL_MAGIC, 9, struct nvgpu_as_get_buffer_compbits_info_args)
+#define NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS \
+	_IOWR(NVGPU_AS_IOCTL_MAGIC, 10, struct nvgpu_as_map_buffer_compbits_args)
+#define NVGPU_AS_IOCTL_MAP_BUFFER_BATCH	\
+	_IOWR(NVGPU_AS_IOCTL_MAGIC, 11, struct nvgpu_as_map_buffer_batch_args)
+#define NVGPU_AS_IOCTL_GET_SYNC_RO_MAP	\
+	_IOR(NVGPU_AS_IOCTL_MAGIC,  12, struct nvgpu_as_get_sync_ro_map_args)
+#define NVGPU_AS_IOCTL_MAPPING_MODIFY	\
+	_IOWR(NVGPU_AS_IOCTL_MAGIC,  13, struct nvgpu_as_mapping_modify_args)
+
+#define NVGPU_AS_IOCTL_LAST		\
+	_IOC_NR(NVGPU_AS_IOCTL_MAPPING_MODIFY)
+#define NVGPU_AS_IOCTL_MAX_ARG_SIZE	\
+	sizeof(struct nvgpu_as_map_buffer_ex_args)
+
+#endif
--- a/include/uapi/linux/nvgpu-ctrl.h
+++ b/include/uapi/linux/nvgpu-ctrl.h
--- a/include/uapi/linux/nvgpu-event.h
+++ b/include/uapi/linux/nvgpu-event.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ * Event session
+ *
+ * NVGPU_GPU_IOCTL_GET_EVENT_FD opens an event session.
+ * Below ioctls can be used on these sessions fds.
+ */
+
+#ifndef _UAPI__LINUX_NVGPU_EVENT_H__
+#define _UAPI__LINUX_NVGPU_EVENT_H__
+
+#include "nvgpu-uapi-common.h"
+
+#define NVGPU_EVENT_IOCTL_MAGIC	'E'
+
+/* Normal events (POLLIN) */
+/* Event associated to a VF update */
+#define NVGPU_GPU_EVENT_VF_UPDATE				0
+
+/* Recoverable alarms (POLLPRI) */
+/* Alarm when target frequency on any session is not possible */
+#define NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE		1
+/* Alarm when target frequency on current session is not possible */
+#define NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE	2
+/* Alarm when Clock Arbiter failed */
+#define NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED		3
+/* Alarm when VF table update failed */
+#define NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED		4
+/* Alarm on thermal condition */
+#define NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD		5
+/* Alarm on power condition */
+#define NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD		6
+
+/* Non recoverable alarm (POLLHUP) */
+/* Alarm on GPU shutdown/fall from bus */
+#define NVGPU_GPU_EVENT_ALARM_GPU_LOST				7
+
+#define NVGPU_GPU_EVENT_LAST	NVGPU_GPU_EVENT_ALARM_GPU_LOST
+
+struct nvgpu_gpu_event_info {
+	__u32 event_id;		/* NVGPU_GPU_EVENT_* */
+	__u32 reserved;
+	__u64 timestamp;	/* CPU timestamp (in nanoseconds) */
+};
+
+struct nvgpu_gpu_set_event_filter_args {
+
+	/* in: Flags (not currently used). */
+	__u32 flags;
+
+	/* in: Size of event filter in 32-bit words */
+	__u32 size;
+
+	/* in: Address of buffer containing bit mask of events.
+	 * Bit #n is set if event #n should be monitored.
+	 */
+	__u64 buffer;
+};
+
+#define NVGPU_EVENT_IOCTL_SET_FILTER \
+	_IOW(NVGPU_EVENT_IOCTL_MAGIC, 1, struct nvgpu_gpu_set_event_filter_args)
+#define NVGPU_EVENT_IOCTL_LAST		\
+	_IOC_NR(NVGPU_EVENT_IOCTL_SET_FILTER)
+#define NVGPU_EVENT_IOCTL_MAX_ARG_SIZE	\
+	sizeof(struct nvgpu_gpu_set_event_filter_args)
+
+#endif
--- a/include/uapi/linux/nvgpu-uapi-common.h
+++ b/include/uapi/linux/nvgpu-uapi-common.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _UAPI__LINUX_NVGPU_UAPI_COMMON_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#if !defined(__KERNEL__)
+#define __user
+#define nvgpu_user
+
+/* Some userspace builds have __packed defined already */
+#if !defined(__packed)
+#define __packed __attribute__((packed))
+#endif /* __packed */
+
+#endif /* __KERNEL__ */
+
+#endif
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h