Open source GPL/LGPL release

2025-12-22 09:12:24 +03:00 · 2025-12-19 15:25:44 -08:00
commit 9fc87a7ec7
2261 changed files with 576825 additions and 0 deletions
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -0,0 +1,2 @@
+# Choose this option if you have an SoC with integrated Nvidia GPU IP.
+obj-m += nvgpu/
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -0,0 +1,788 @@
+GCOV_PROFILE := y
+
+# When building NVGPU as an external module, srctree.nvgpu is not
+# defined. Given that NVGPU knows where its header files are located
+# relatively, we can use the path information of this Makefile to
+# set srctree.nvgpu if it is not already defined. Please note that
+# 'lastword $(MAKEFILE_LIST)' refers to this Makefile.
+srctree.nvgpu ?= $(abspath $(shell dirname $(lastword $(MAKEFILE_LIST)))/../../..)
+
+ccflags-y += -I$(srctree.nvgpu)/drivers/gpu/nvgpu/include
+ccflags-y += -I$(srctree.nvgpu)/drivers/gpu/nvgpu
+ccflags-y += -I$(srctree.nvgpu)/include
+ccflags-y += -I$(srctree.nvgpu)/include/uapi
+
+ccflags-y += -I$(srctree)/drivers/devfreq
+
+ccflags-y += -Wframe-larger-than=2048
+ccflags-y += -Wno-multichar
+ccflags-y += -Werror
+ccflags-y += -Wno-error=cpp
+ifeq ($(VERSION),4)
+ccflags-y += -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
+endif
+
+# Turn off when this is fixed upstream, if ever.
+ccflags-y += -D__NVGPU_PREVENT_UNTRUSTED_SPECULATION
+
+ifneq ($(srctree.nvgpu),)
+include $(srctree.nvgpu)/drivers/gpu/nvgpu/Makefile.linux.configs
+else
+include Makefile.linux.configs
+endif
+
+# When using the upstream host1x driver, the Makefile must define the
+# srctree.host1x path in order to find the necessary header files for
+# the upstream host1x driver.
+ifeq ($(CONFIG_TEGRA_GK20A_NVHOST_HOST1X),y)
+ccflags-y += -I$(srctree.host1x)/include
+endif
+
+ifeq ($(CONFIG_NVGPU_DEBUGGER),y)
+ccflags-y += -DCONFIG_NVGPU_DEBUGGER
+ccflags-y += -DCONFIG_NVGPU_ENGINE_RESET
+endif
+
+ccflags-y += -DCONFIG_NVGPU_DETERMINISTIC_CHANNELS
+ccflags-y += -DCONFIG_NVGPU_TPC_POWERGATE
+ccflags-y += -DCONFIG_NVGPU_ACR_LEGACY
+ccflags-y += -DCONFIG_NVGPU_ENGINE_QUEUE
+ccflags-y += -DCONFIG_NVGPU_FIFO_ENGINE_ACTIVITY
+ccflags-y += -DCONFIG_NVGPU_USERD
+ccflags-y += -DCONFIG_NVGPU_CHANNEL_WDT
+ccflags-y += -DCONFIG_NVGPU_LS_PMU
+ccflags-y += -DCONFIG_NVGPU_CILP
+ccflags-y += -DCONFIG_NVGPU_GFXP
+ccflags-y += -DCONFIG_NVGPU_GRAPHICS
+ccflags-y += -DCONFIG_NVGPU_REPLAYABLE_FAULT
+ccflags-y += -DCONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
+ccflags-y += -DCONFIG_NVGPU_CHANNEL_TSG_CONTROL
+ccflags-y += -DCONFIG_NVGPU_POWER_PG
+ccflags-y += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT
+ccflags-y += -DCONFIG_NVGPU_SIM
+ccflags-y += -DCONFIG_NVGPU_TRACE
+ccflags-y += -DCONFIG_NVGPU_SYSFS
+ccflags-y += -DCONFIG_NVGPU_CLK_ARB
+ccflags-y += -DCONFIG_NVGPU_FALCON_DEBUG
+ccflags-y += -DCONFIG_NVGPU_FALCON_NON_FUSA
+ccflags-y += -DCONFIG_NVGPU_IOCTL_NON_FUSA
+ccflags-y += -DCONFIG_NVGPU_NON_FUSA
+ccflags-y += -DCONFIG_NVGPU_INJECT_HWERR
+ccflags-y += -DCONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT
+ccflags-y += -DCONFIG_NVGPU_SET_FALCON_ACCESS_MAP
+ccflags-y += -DCONFIG_NVGPU_SW_SEMAPHORE
+ccflags-y += -DCONFIG_NVGPU_FENCE
+ccflags-y += -DCONFIG_NVGPU_PROFILER
+
+ifeq ($(CONFIG_NVGPU_LOGGING),y)
+ccflags-y += -DCONFIG_NVGPU_LOGGING=1
+endif
+
+ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),y)
+ccflags-y += -DCONFIG_NVGPU_HAL_NON_FUSA
+endif
+
+ifeq ($(CONFIG_NVGPU_RECOVERY),y)
+ccflags-y += -DCONFIG_NVGPU_RECOVERY
+ccflags-y += -DCONFIG_NVGPU_ENGINE_RESET
+nvgpu-y += \
+	hal/rc/rc_gk20a.o \
+	hal/rc/rc_gv11b.o
+endif
+
+obj-$(CONFIG_GK20A) := nvgpu.o
+
+# OS independent parts of nvgpu. The work to collect files here
+# is in progress.
+
+ifeq ($(CONFIG_NVGPU_DGPU),y)
+nvgpu-$(CONFIG_NVGPU_DGPU) += \
+	os/linux/pci.o \
+	os/linux/pci_power.o \
+	os/linux/dmabuf_vidmem.o \
+	os/linux/os_ops_gv100.o \
+	os/linux/os_ops_tu104.o \
+	common/sec2/sec2.o \
+	common/sec2/sec2_allocator.o \
+	common/sec2/sec2_lsfm.o \
+	common/sec2/ipc/sec2_cmd.o \
+	common/sec2/ipc/sec2_msg.o \
+	common/sec2/ipc/sec2_queue.o \
+	common/sec2/ipc/sec2_seq.o \
+	common/vbios/bios_sw_gv100.o \
+	common/vbios/bios_sw_tu104.o \
+	common/falcon/falcon_sw_tu104.o \
+	common/acr/acr_sw_tu104.o \
+	common/mm/allocators/page_allocator.o \
+	common/mm/vidmem.o \
+	common/pramin.o \
+	common/ce/ce_app.o \
+	common/clk_arb/clk_arb_gv100.o \
+	common/engine_queues/engine_emem_queue.o \
+	hal/mm/mm_gv100.o \
+	hal/mm/mm_tu104.o \
+	hal/mc/mc_gv100.o  \
+	hal/mc/mc_tu104.o  \
+	hal/bus/bus_gv100.o \
+	hal/bus/bus_tu104.o \
+	hal/class/class_tu104.o \
+	hal/clk/clk_tu104.o \
+	hal/clk/clk_mon_tu104.o \
+	hal/gr/init/gr_init_gv100.o \
+	hal/gr/init/gr_init_tu104.o \
+	hal/gr/intr/gr_intr_tu104.o \
+	hal/gr/falcon/gr_falcon_tu104.o \
+	hal/fbpa/fbpa_tu104.o \
+	hal/init/hal_tu104.o \
+	hal/init/hal_tu104_litter.o \
+	hal/power_features/cg/tu104_gating_reglist.o \
+	hal/ltc/ltc_tu104.o \
+	hal/fb/fb_gv100.o \
+	hal/fb/fb_tu104.o \
+	hal/fb/fb_mmu_fault_tu104.o \
+	hal/fb/intr/fb_intr_gv100.o \
+	hal/fb/intr/fb_intr_tu104.o \
+	hal/func/func_tu104.o \
+	hal/fifo/fifo_tu104.o \
+	hal/fifo/usermode_tu104.o \
+	hal/fifo/pbdma_tu104.o \
+	hal/fifo/ramfc_tu104.o \
+	hal/fifo/ramin_tu104.o \
+	hal/fifo/channel_gv100.o \
+	hal/fifo/runlist_ram_tu104.o \
+	hal/fifo/runlist_fifo_gv100.o \
+	hal/fifo/runlist_fifo_tu104.o \
+	hal/fifo/fifo_intr_gv100.o \
+	hal/fuse/fuse_gp106.o \
+	hal/fuse/fuse_tu104.o \
+	hal/netlist/netlist_gv100.o \
+	hal/netlist/netlist_tu104.o \
+	hal/nvdec/nvdec_gp106.o \
+	hal/nvdec/nvdec_tu104.o \
+	hal/gsp/gsp_tu104.o \
+	hal/sec2/sec2_tu104.o \
+	hal/pramin/pramin_gp10b.o \
+	hal/pramin/pramin_gv100.o \
+	hal/pramin/pramin_init.o \
+	hal/pramin/pramin_tu104.o \
+	hal/bios/bios_tu104.o \
+	hal/top/top_gv100.o \
+	hal/xve/xve_gp106.o \
+	hal/xve/xve_tu104.o
+
+nvgpu-$(CONFIG_DEBUG_FS) += \
+	os/linux/debug_therm_tu104.o \
+	os/linux/debug_bios.o \
+	os/linux/debug_xve.o \
+	os/linux/debug_clk_tu104.o
+endif
+
+# nvlink sources are not conditionally compiled. nvlink probe and
+# public functions return -ENODEV when not supported.
+
+nvgpu-y += \
+	common/vbios/nvlink_bios.o \
+	common/nvlink/probe.o \
+	common/nvlink/init/device_reginit.o \
+	common/nvlink/init/device_reginit_gv100.o \
+	common/nvlink/minion.o \
+	common/nvlink/link_mode_transitions.o \
+	common/nvlink/nvlink.o \
+	os/linux/nvlink_probe.o \
+	os/linux/nvlink.o \
+	hal/nvlink/minion_gv100.o \
+	hal/nvlink/minion_tu104.o \
+	hal/nvlink/nvlink_gv100.o \
+	hal/nvlink/nvlink_tu104.o \
+	hal/nvlink/intr_and_err_handling_tu104.o \
+	hal/nvlink/link_mode_transitions_gv100.o \
+	hal/nvlink/link_mode_transitions_tu104.o
+
+nvgpu-y += \
+	common/device.o \
+	common/utils/enabled.o \
+	common/utils/errata.o \
+	common/utils/rbtree.o \
+	common/utils/string.o \
+	common/utils/worker.o \
+	common/swdebug/profile.o \
+	common/ptimer/ptimer.o \
+	common/perf/perfbuf.o \
+	common/therm/therm.o \
+	common/mc/mc.o \
+	common/sync/channel_sync.o \
+	common/sync/channel_sync_semaphore.o \
+	common/semaphore/semaphore_sea.o \
+	common/semaphore/semaphore_pool.o \
+	common/semaphore/semaphore_hw.o \
+	common/semaphore/semaphore.o \
+	common/regops/regops.o \
+	common/ltc/ltc.o \
+	common/fb/fb.o \
+	common/fbp/fbp.o \
+	common/gr/gr_utils.o \
+	common/gr/gr.o \
+	common/gr/gr_intr.o \
+	common/gr/global_ctx.o \
+	common/gr/ctx.o \
+	common/gr/gr_falcon.o \
+	common/gr/subctx.o \
+	common/gr/zcull.o \
+	common/gr/gr_config.o \
+	common/gr/zbc.o \
+	common/gr/gr_setup.o \
+	common/gr/hwpm_map.o \
+	common/gr/obj_ctx.o \
+	common/gr/fs_state.o \
+	common/gr/gr_ecc.o \
+	common/netlist/netlist.o \
+	common/init/nvgpu_init.o \
+	common/pmu/pmu.o \
+	common/pmu/allocator.o \
+	common/pmu/pmu_mutex.o \
+	common/pmu/fw/fw.o \
+	common/pmu/fw/fw_ver_ops.o \
+	common/pmu/fw/fw_ns_bootstrap.o \
+	common/pmu/pg/pmu_pg.o \
+	common/pmu/pg/pmu_aelpg.o \
+	common/pmu/perfmon/pmu_perfmon.o \
+	common/pmu/perfmon/pmu_perfmon_sw_gm20b.o \
+	common/pmu/perfmon/pmu_perfmon_sw_gv11b.o \
+	common/pmu/pmu_debug.o \
+	common/pmu/pg/pg_sw_gm20b.o \
+	common/pmu/pg/pg_sw_gp10b.o \
+	common/pmu/pg/pg_sw_gp106.o \
+	common/pmu/pg/pg_sw_gv11b.o \
+	common/pmu/ipc/pmu_cmd.o \
+	common/pmu/ipc/pmu_msg.o \
+	common/pmu/ipc/pmu_queue.o \
+	common/pmu/ipc/pmu_seq.o \
+	common/acr/acr.o \
+	common/acr/acr_wpr.o \
+	common/acr/acr_blob_alloc.o \
+	common/acr/acr_blob_construct_v0.o \
+	common/acr/acr_blob_construct.o \
+	common/acr/acr_bootstrap.o \
+	common/acr/acr_sw_gm20b.o \
+	common/acr/acr_sw_gp10b.o \
+	common/acr/acr_sw_gv11b.o \
+	common/sbr/sbr.o \
+	common/pmu/super_surface/super_surface.o \
+	common/pmu/lsfm/lsfm.o \
+	common/pmu/lsfm/lsfm_sw_gm20b.o \
+	common/pmu/lsfm/lsfm_sw_gp10b.o \
+	common/pmu/lsfm/lsfm_sw_gv100.o \
+	common/pmu/lsfm/lsfm_sw_tu104.o \
+	common/pmu/perf/vfe_var.o \
+	common/pmu/perf/vfe_equ.o \
+	common/pmu/perf/perf.o \
+	common/pmu/perf/change_seq.o \
+	common/pmu/perf/pstate.o \
+	common/pmu/pmgr/pwrdev.o \
+	common/pmu/pmgr/pmgr.o \
+	common/pmu/pmgr/pmgrpmu.o \
+	common/pmu/pmgr/pwrmonitor.o \
+	common/pmu/pmgr/pwrpolicy.o \
+	common/pmu/volt/volt.o \
+	common/pmu/volt/volt_rail.o \
+	common/pmu/volt/volt_dev.o \
+	common/pmu/volt/volt_policy.o \
+	common/pmu/therm/thrm.o \
+	common/pmu/therm/therm_dev.o \
+	common/pmu/therm/therm_channel.o \
+	common/pmu/lpwr/rppg.o \
+	common/pmu/pmu_pstate.o \
+	common/pmu/pmu_rtos_init.o \
+	common/pmu/clk/clk_vin.o \
+	common/pmu/clk/clk_fll.o \
+	common/pmu/clk/clk_domain.o \
+	common/pmu/clk/clk_prog.o \
+	common/pmu/clk/clk_vf_point.o \
+	common/pmu/clk/clk.o \
+	common/pmu/boardobj/boardobj.o \
+	common/pmu/boardobj/boardobjgrp.o \
+	common/pmu/boardobj/boardobjgrpmask.o \
+	common/pmu/boardobj/boardobjgrp_e255.o \
+	common/pmu/boardobj/boardobjgrp_e32.o \
+	common/clk_arb/clk_arb.o \
+	common/clk_arb/clk_arb_gp10b.o \
+	common/rc/rc.o \
+	common/grmgr/grmgr.o \
+	common/cic/cic.o \
+	common/cic/cic_intr.o \
+	common/cic/ce_cic.o \
+	common/cic/ctxsw_cic.o \
+	common/cic/ecc_cic.o \
+	common/cic/host_cic.o \
+	common/cic/gr_cic.o \
+	common/cic/pri_cic.o \
+	common/cic/pmu_cic.o \
+	common/cic/mmu_cic.o \
+	common/cic/msg_cic.o \
+	hal/bus/bus_gk20a.o \
+	hal/class/class_gm20b.o \
+	hal/class/class_gp10b.o \
+	hal/clk/clk_gm20b.o \
+	hal/gr/ecc/ecc_gp10b.o \
+	hal/gr/ecc/ecc_gv11b.o \
+	hal/gr/zcull/zcull_gm20b.o \
+	hal/gr/zcull/zcull_gv11b.o \
+	hal/gr/ctxsw_prog/ctxsw_prog_gp10b.o \
+	hal/gr/ctxsw_prog/ctxsw_prog_gv11b.o \
+	hal/gr/init/gr_init_gm20b.o \
+	hal/gr/init/gr_init_gp10b.o \
+	hal/gr/init/gr_init_gv11b.o \
+	hal/gr/intr/gr_intr_gm20b.o \
+	hal/gr/intr/gr_intr_gp10b.o \
+	hal/gr/hwpm_map/hwpm_map_gv100.o \
+	hal/gr/zbc/zbc_gm20b.o \
+	hal/gr/zbc/zbc_gp10b.o \
+	hal/gr/zbc/zbc_gv11b.o \
+	hal/gr/gr/gr_gk20a.o \
+	hal/gr/gr/gr_gm20b.o \
+	hal/gr/gr/gr_gp10b.o \
+	hal/gr/gr/gr_gv100.o \
+	hal/gr/gr/gr_gv11b.o \
+	hal/gr/gr/gr_tu104.o \
+	hal/init/hal_gv11b.o \
+	hal/init/hal_gv11b_litter.o \
+	hal/init/hal_init.o \
+	hal/perf/perf_gv11b.o \
+	hal/perf/perf_tu104.o \
+	hal/power_features/cg/gp10b_gating_reglist.o \
+	hal/power_features/cg/gv11b_gating_reglist.o \
+	hal/regops/regops_gv11b.o \
+	hal/regops/allowlist_gv11b.o \
+	hal/ce/ce2_gk20a.o \
+	hal/therm/therm_gp10b.o \
+	hal/therm/therm_tu104.o \
+	hal/gr/falcon/gr_falcon_gm20b.o \
+	hal/ltc/ltc_gp10b.o \
+	hal/ltc/intr/ltc_intr_gm20b.o \
+	hal/ltc/intr/ltc_intr_gp10b.o \
+	hal/fb/fb_gm20b.o \
+	hal/fb/fb_gp10b.o \
+	hal/fb/fb_gp106.o \
+	hal/fb/fb_gv11b.o \
+	hal/fb/intr/fb_intr_ecc_gv11b.o \
+	hal/fuse/fuse_gm20b.o \
+	hal/fifo/fifo_gk20a.o \
+	hal/fifo/preempt_gk20a.o \
+	hal/fifo/ramfc_gk20a.o \
+	hal/fifo/ramfc_gp10b.o \
+	hal/fifo/ramin_gk20a.o \
+	hal/fifo/ramin_gp10b.o \
+	hal/fifo/runlist_fifo_gv11b.o \
+	hal/fifo/channel_gk20a.o \
+	hal/fifo/channel_gm20b.o \
+	hal/fifo/tsg_gk20a.o \
+	hal/fifo/userd_gk20a.o \
+	hal/fifo/userd_gv11b.o \
+	hal/fifo/fifo_intr_gk20a.o \
+	hal/fifo/ctxsw_timeout_gk20a.o \
+	hal/netlist/netlist_gp10b.o \
+	hal/sync/sema_cmdbuf_gk20a.o \
+	hal/sync/sema_cmdbuf_gv11b.o \
+	hal/pmu/pmu_gk20a.o \
+	hal/pmu/pmu_gm20b.o \
+	hal/pmu/pmu_gp10b.o \
+	hal/pmu/pmu_gv11b.o \
+	hal/pmu/pmu_tu104.o \
+	hal/top/top_gp106.o \
+	hal/top/top_gp10b.o \
+	hal/tpc/tpc_gv11b.o \
+	hal/priv_ring/priv_ring_gv11b.o \
+	hal/cic/cic_gv11b_fusa.o \
+	hal/cic/cic_lut_gv11b_fusa.o
+
+# Linux specific parts of nvgpu.
+nvgpu-y += \
+	os/linux/os_ops.o \
+	os/linux/os_ops_gm20b.o \
+	os/linux/os_ops_gp10b.o \
+	os/linux/os_ops_gv11b.o \
+	os/linux/kmem.o \
+	os/linux/timers.o \
+	os/linux/ioctl.o \
+	os/linux/ioctl_ctrl.o \
+	os/linux/ioctl_as.o \
+	os/linux/ioctl_channel.o \
+	os/linux/ioctl_tsg.o \
+	os/linux/ioctl_dbg.o \
+	os/linux/ioctl_prof.o \
+	os/linux/ioctl_clk_arb.o \
+	os/linux/cond.o \
+	os/linux/nvgpu_mem.o \
+	os/linux/linux-dma.o \
+	os/linux/driver_common.o \
+	os/linux/firmware.o \
+	os/linux/thread.o \
+	os/linux/vm.o \
+	os/linux/intr.o \
+	os/linux/sysfs.o \
+	os/linux/linux-io.o \
+	os/linux/io_usermode.o \
+	os/linux/rwsem.o \
+	os/linux/sched.o \
+	os/linux/linux-channel.o \
+	os/linux/sim.o \
+	os/linux/sim_pci.o \
+	os/linux/os_sched.o \
+	os/linux/dt.o \
+	os/linux/ecc_sysfs.o \
+	os/linux/bsearch.o \
+	os/linux/cic/cic_stub.o \
+	os/linux/dmabuf_priv.o \
+	os/linux/power_ops.o
+
+nvgpu-$(CONFIG_NVGPU_VPR) += os/linux/vpr.o
+
+nvgpu-$(CONFIG_DEBUG_FS) += \
+	os/linux/debug.o \
+	os/linux/debug_gr.o \
+	os/linux/debug_fifo.o \
+	os/linux/debug_ce.o \
+	os/linux/debug_pmu.o \
+	os/linux/debug_pmgr.o \
+	os/linux/debug_sched.o \
+	os/linux/debug_allocator.o \
+	os/linux/debug_hal.o \
+	os/linux/debug_clk_gm20b.o \
+	os/linux/debug_ltc.o \
+	os/linux/debug_volt.o \
+	os/linux/debug_s_param.o \
+	os/linux/swprofile_debugfs.o
+
+nvgpu-$(CONFIG_NVGPU_LOGGING) += os/linux/log.o
+
+ifeq ($(CONFIG_NVGPU_TRACK_MEM_USAGE),y)
+nvgpu-$(CONFIG_DEBUG_FS) += \
+	os/linux/debug_kmem.o
+endif
+
+nvgpu-$(CONFIG_NVGPU_FECS_TRACE) += \
+	common/gr/fecs_trace.o \
+	hal/gr/fecs_trace/fecs_trace_gm20b.o \
+	hal/gr/fecs_trace/fecs_trace_gv11b.o \
+	os/linux/fecs_trace_linux.o
+
+ifeq ($(CONFIG_NVGPU_FECS_TRACE),y)
+nvgpu-$(CONFIG_DEBUG_FS) += \
+	os/linux/debug_fecs_trace.o
+endif
+
+nvgpu-$(CONFIG_TEGRA_GK20A) += \
+	os/linux/fuse.o \
+	os/linux/module.o \
+	os/linux/module_usermode.o \
+	os/linux/platform_gk20a_tegra.o \
+	os/linux/platform_gp10b_tegra.o \
+	os/linux/platform_gv11b_tegra.o
+
+ifeq ($(CONFIG_TEGRA_GK20A),y)
+nvgpu-$(CONFIG_NVGPU_TEGRA_FUSE) += os/linux/soc.o
+endif
+
+nvgpu-$(CONFIG_NVGPU_SYNCFD_ANDROID) += \
+	os/linux/sync_sema_android.o \
+	os/linux/os_fence_android.o \
+	os/linux/os_fence_android_sema.o
+
+nvgpu-$(CONFIG_NVGPU_SYNCFD_STABLE) += \
+	os/linux/sync_sema_dma.o \
+	os/linux/os_fence_dma.o \
+	os/linux/os_fence_dma_sema.o
+
+nvgpu-$(CONFIG_TEGRA_GK20A_NVHOST) += \
+	common/sync/channel_sync_syncpt.o \
+	common/fence/fence_syncpt.o
+ifneq ($(CONFIG_NVGPU_SYNCFD_NONE),y)
+nvgpu-$(CONFIG_TEGRA_GK20A_NVHOST) += \
+	os/linux/os_fence_syncpt.o
+endif
+
+ifeq ($(CONFIG_TEGRA_GK20A_NVHOST_HOST1X),y)
+nvgpu-$(CONFIG_TEGRA_GK20A_NVHOST) += \
+	os/linux/nvhost_host1x.o
+else
+nvgpu-$(CONFIG_TEGRA_GK20A_NVHOST) += \
+	os/linux/nvhost.o
+endif
+
+nvgpu-$(CONFIG_TEGRA_GK20A_NVHOST) += \
+	os/linux/nvhost_common.o \
+	hal/sync/syncpt_cmdbuf_gk20a.o \
+	hal/sync/syncpt_cmdbuf_gv11b.o \
+	hal/sync/syncpt_cmdbuf_gv11b_fusa.o \
+	common/sync/channel_user_syncpt.o
+
+nvgpu-$(CONFIG_NVGPU_GR_VIRTUALIZATION) += \
+	os/linux/vgpu/platform_vgpu_tegra.o \
+	os/linux/vgpu/sysfs_vgpu.o \
+	os/linux/vgpu/vgpu_ivc.o \
+	os/linux/vgpu/vgpu_ivm.o \
+	os/linux/vgpu/vgpu_linux.o \
+	os/linux/vgpu/gv11b/platform_gv11b_vgpu_tegra.o
+
+ifeq ($(CONFIG_NVGPU_FECS_TRACE),y)
+nvgpu-$(CONFIG_NVGPU_GR_VIRTUALIZATION) += \
+	os/linux/vgpu/fecs_trace_vgpu_linux.o
+endif
+
+nvgpu-$(CONFIG_COMMON_CLK) += \
+	os/linux/clk.o
+
+nvgpu-$(CONFIG_GK20A_DEVFREQ) += \
+	os/linux/scale.o
+
+nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \
+	os/linux/cde.o \
+	os/linux/cde_gm20b.o \
+	os/linux/cde_gp10b.o
+
+ifeq ($(CONFIG_DEBUG_FS),y)
+nvgpu-$(CONFIG_NVGPU_SUPPORT_CDE) += \
+	os/linux/debug_cde.o
+endif
+
+nvgpu-y += \
+	common/mm/allocators/nvgpu_allocator.o \
+	common/mm/allocators/bitmap_allocator.o \
+	common/mm/allocators/buddy_allocator.o \
+	common/mm/gmmu/page_table.o \
+	common/mm/gmmu/pd_cache.o \
+	common/mm/gmmu/pte.o \
+	common/mm/as.o \
+	common/mm/vm.o \
+	common/mm/vm_area.o \
+	common/mm/nvgpu_mem.o \
+	common/mm/nvgpu_sgt.o \
+	common/mm/mm.o \
+	common/mm/dma.o \
+	common/vbios/bios.o \
+	common/falcon/falcon.o \
+	common/falcon/falcon_sw_gk20a.o \
+	common/engine_queues/engine_mem_queue.o \
+	common/engine_queues/engine_dmem_queue.o \
+	common/engine_queues/engine_fb_queue.o \
+	common/io/io.o \
+	common/power_features/power_features.o \
+	common/power_features/cg/cg.o \
+	common/power_features/pg/pg.o \
+	common/sim/sim.o \
+	common/sim/sim_pci.o \
+	common/sim/sim_netlist.o \
+	common/fifo/fifo.o \
+	common/fifo/preempt.o \
+	common/fifo/channel.o \
+	common/fifo/channel_wdt.o \
+	common/fifo/channel_worker.o \
+	common/fifo/pbdma.o \
+	common/fifo/submit.o \
+	common/fifo/job.o \
+	common/fifo/priv_cmdbuf.o \
+	common/fifo/tsg.o \
+	common/fifo/runlist.o \
+	common/fifo/engine_status.o \
+	common/fifo/engines.o \
+	common/fifo/pbdma_status.o \
+	common/fifo/userd.o \
+	common/fifo/watchdog.o \
+	common/fence/fence.o \
+	common/fence/fence_sema.o \
+	common/ecc.o \
+	common/log_common.o \
+	common/ce/ce.o \
+	common/debugger.o \
+	common/profiler/profiler.o \
+	common/profiler/pm_reservation.o
+
+nvgpu-$(CONFIG_NVGPU_GR_VIRTUALIZATION) += \
+	common/vgpu/ltc/ltc_vgpu.o \
+	common/vgpu/fbp/fbp_vgpu.o \
+	common/vgpu/fb/fb_vgpu.o \
+	common/vgpu/gr/gr_vgpu.o \
+	common/vgpu/gr/ctx_vgpu.o \
+	common/vgpu/gr/subctx_vgpu.o \
+	common/vgpu/top/top_vgpu.o \
+	common/vgpu/fifo/fifo_vgpu.o \
+	common/vgpu/fifo/channel_vgpu.o \
+	common/vgpu/fifo/tsg_vgpu.o \
+	common/vgpu/fifo/preempt_vgpu.o \
+	common/vgpu/fifo/runlist_vgpu.o \
+	common/vgpu/fifo/ramfc_vgpu.o \
+	common/vgpu/fifo/userd_vgpu.o \
+	common/vgpu/ce_vgpu.o \
+	common/vgpu/mm/mm_vgpu.o \
+	common/vgpu/mm/vm_vgpu.o \
+	common/vgpu/init/init_vgpu.o \
+	common/vgpu/ivc/comm_vgpu.o \
+	common/vgpu/intr/intr_vgpu.o \
+	common/vgpu/ptimer/ptimer_vgpu.o \
+	common/vgpu/debugger_vgpu.o \
+	common/vgpu/pm_reservation_vgpu.o \
+	common/vgpu/perf/perf_vgpu.o \
+	common/vgpu/profiler/profiler_vgpu.o \
+	common/vgpu/ecc_vgpu.o \
+	common/vgpu/clk_vgpu.o \
+	common/vgpu/gr/fecs_trace_vgpu.o \
+	hal/vgpu/init/init_hal_vgpu.o \
+	hal/vgpu/fifo/fifo_gv11b_vgpu.o \
+	hal/vgpu/sync/syncpt_cmdbuf_gv11b_vgpu.o
+
+nvgpu-$(CONFIG_NVGPU_CYCLESTATS) += \
+	common/perf/cyclestats_snapshot.o \
+	common/cyclestats/cyclestats.o
+
+ifeq ($(CONFIG_NVGPU_GR_VIRTUALIZATION),y)
+nvgpu-$(CONFIG_NVGPU_CYCLESTATS) += \
+	common/vgpu/perf/cyclestats_snapshot_vgpu.o
+nvgpu-$(CONFIG_NVGPU_COMPRESSION) += \
+	common/vgpu/cbc/cbc_vgpu.o
+endif
+
+nvgpu-$(CONFIG_NVGPU_COMPRESSION) += \
+	os/linux/comptags.o \
+	common/mm/comptags.o \
+	common/cbc/cbc.o \
+	hal/cbc/cbc_gm20b.o \
+	hal/cbc/cbc_gp10b.o \
+	hal/cbc/cbc_gv11b.o \
+	hal/cbc/cbc_tu104.o \
+
+# FUSA (Functionally Safe) HAL source files
+nvgpu-y += \
+	hal/mm/mm_gv11b_fusa.o \
+	hal/mm/mm_gp10b_fusa.o \
+	hal/mm/gmmu/gmmu_gv11b_fusa.o \
+	hal/mm/gmmu/gmmu_gp10b_fusa.o \
+	hal/mm/gmmu/gmmu_gk20a_fusa.o \
+	hal/mm/gmmu/gmmu_gm20b_fusa.o \
+	hal/mm/cache/flush_gk20a_fusa.o \
+	hal/mm/cache/flush_gv11b_fusa.o \
+	hal/mm/mmu_fault/mmu_fault_gv11b_fusa.o \
+	hal/ltc/intr/ltc_intr_gp10b_fusa.o \
+	hal/ltc/intr/ltc_intr_gv11b_fusa.o \
+	hal/bus/bus_gk20a_fusa.o \
+	hal/bus/bus_gm20b_fusa.o \
+	hal/bus/bus_gp10b_fusa.o \
+	hal/bus/bus_gv11b_fusa.o \
+	hal/ce/ce_gp10b_fusa.o \
+	hal/ce/ce_gv11b_fusa.o \
+	hal/class/class_gv11b_fusa.o \
+	hal/falcon/falcon_gk20a_fusa.o \
+	hal/fb/fb_gm20b_fusa.o \
+	hal/fb/fb_gv11b_fusa.o \
+	hal/fb/fb_mmu_fault_gv11b_fusa.o \
+	hal/fb/ecc/fb_ecc_gv11b_fusa.o \
+	hal/fb/intr/fb_intr_ecc_gv11b_fusa.o \
+	hal/fb/intr/fb_intr_gv11b_fusa.o \
+	hal/fifo/channel_gk20a_fusa.o \
+	hal/fifo/channel_gm20b_fusa.o \
+	hal/fifo/channel_gv11b_fusa.o \
+	hal/fifo/ctxsw_timeout_gv11b_fusa.o \
+	hal/fifo/engine_status_gm20b_fusa.o \
+	hal/fifo/engine_status_gv100_fusa.o \
+	hal/fifo/engines_gp10b_fusa.o \
+	hal/fifo/engines_gv11b_fusa.o \
+	hal/fifo/fifo_gk20a_fusa.o \
+	hal/fifo/fifo_gv11b_fusa.o \
+	hal/fifo/fifo_intr_gk20a_fusa.o \
+	hal/fifo/fifo_intr_gv11b_fusa.o \
+	hal/fifo/pbdma_gm20b_fusa.o \
+	hal/fifo/pbdma_gp10b_fusa.o \
+	hal/fifo/pbdma_gv11b_fusa.o \
+	hal/fifo/pbdma_status_gm20b_fusa.o \
+	hal/fifo/preempt_gv11b_fusa.o \
+	hal/fifo/ramfc_gp10b_fusa.o \
+	hal/fifo/ramfc_gv11b_fusa.o \
+	hal/fifo/ramin_gk20a_fusa.o \
+	hal/fifo/ramin_gm20b_fusa.o \
+	hal/fifo/ramin_gv11b_fusa.o \
+	hal/fifo/runlist_fifo_gk20a_fusa.o \
+	hal/fifo/runlist_fifo_gv11b_fusa.o \
+	hal/fifo/runlist_ram_gv11b_fusa.o \
+	hal/fifo/tsg_gk20a_fusa.o \
+	hal/fifo/tsg_gv11b_fusa.o \
+	hal/fifo/usermode_gv11b_fusa.o \
+	hal/fuse/fuse_gm20b_fusa.o \
+	hal/fuse/fuse_gp10b_fusa.o \
+	hal/gr/config/gr_config_gm20b_fusa.o \
+	hal/gr/config/gr_config_gv100_fusa.o \
+	hal/gr/ctxsw_prog/ctxsw_prog_gm20b_fusa.o \
+	hal/gr/ctxsw_prog/ctxsw_prog_gp10b_fusa.o \
+	hal/gr/ctxsw_prog/ctxsw_prog_gv11b_fusa.o \
+	hal/gr/ecc/ecc_gv11b_fusa.o \
+	hal/gr/falcon/gr_falcon_gm20b_fusa.o \
+	hal/gr/falcon/gr_falcon_gp10b_fusa.o \
+	hal/gr/falcon/gr_falcon_gv11b_fusa.o \
+	hal/gr/init/gr_init_gm20b_fusa.o \
+	hal/gr/init/gr_init_gp10b_fusa.o \
+	hal/gr/init/gr_init_gv11b_fusa.o \
+	hal/gr/intr/gr_intr_gm20b_fusa.o \
+	hal/gr/intr/gr_intr_gp10b_fusa.o \
+	hal/gr/intr/gr_intr_gv11b_fusa.o \
+	hal/ltc/ltc_gm20b_fusa.o \
+	hal/ltc/ltc_gp10b_fusa.o \
+	hal/ltc/ltc_gv11b_fusa.o \
+	hal/mc/mc_gm20b_fusa.o \
+	hal/mc/mc_gp10b_fusa.o \
+	hal/mc/mc_gv11b_fusa.o \
+	hal/netlist/netlist_gv11b_fusa.o \
+	hal/pmu/pmu_gk20a_fusa.o \
+	hal/pmu/pmu_gv11b_fusa.o \
+	hal/priv_ring/priv_ring_gm20b_fusa.o \
+	hal/priv_ring/priv_ring_gp10b_fusa.o \
+	hal/ptimer/ptimer_gk20a_fusa.o \
+	hal/ptimer/ptimer_gp10b.o \
+	hal/ptimer/ptimer_gv11b.o \
+	hal/therm/therm_gv11b_fusa.o \
+	hal/top/top_gm20b_fusa.o \
+	hal/top/top_gv11b_fusa.o
+
+nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \
+	hal/ce/ce_tu104.o \
+	hal/clk/clk_gm20b.o \
+	hal/init/hal_gp10b.o \
+	hal/init/hal_gp10b_litter.o \
+	hal/init/hal_gm20b.o \
+	hal/init/hal_gm20b_litter.o \
+	hal/fifo/engine_status_gm20b.o \
+	hal/fifo/engines_gm20b.o \
+	hal/fifo/pbdma_gm20b.o \
+	hal/fifo/pbdma_gp10b.o \
+	hal/fifo/mmu_fault_gk20a.o \
+	hal/fifo/mmu_fault_gm20b.o \
+	hal/fifo/mmu_fault_gp10b.o \
+	hal/fifo/runlist_fifo_gk20a.o \
+	hal/fifo/runlist_ram_gk20a.o \
+	hal/gr/config/gr_config_gm20b.o \
+	hal/gr/ctxsw_prog/ctxsw_prog_gm20b.o \
+	hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.o \
+	hal/gr/gr/gr_gk20a.o \
+	hal/gr/gr/gr_gm20b.o \
+	hal/gr/gr/gr_gp10b.o \
+	hal/ltc/ltc_gm20b.o \
+	hal/ltc/ltc_gm20b_dbg.o \
+	hal/mc/mc_gm20b.o \
+	hal/mm/cache/flush_gk20a.o \
+	hal/mm/mm_gm20b.o \
+	hal/mm/mm_gk20a.o \
+	hal/mm/gmmu/gmmu_gk20a.o \
+	hal/mm/gmmu/gmmu_gm20b.o \
+	hal/falcon/falcon_gk20a.o \
+	hal/netlist/netlist_gm20b.o \
+	hal/perf/perf_gm20b.o \
+	hal/power_features/cg/gm20b_gating_reglist.o \
+	hal/priv_ring/priv_ring_gm20b.o \
+	hal/regops/regops_gm20b.o \
+	hal/regops/regops_gp10b.o \
+	hal/regops/regops_tu104.o \
+	hal/regops/allowlist_tu104.o \
+	hal/therm/therm_gm20b.o \
+	hal/top/top_gm20b.o
+
+ifeq ($(CONFIG_NVGPU_GR_VIRTUALIZATION),y)
+nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \
+	hal/vgpu/init/vgpu_hal_gv11b.o
+endif
--- a/drivers/gpu/nvgpu/Makefile.doxygen
+++ b/drivers/gpu/nvgpu/Makefile.doxygen
@@ -0,0 +1,53 @@
+##################### tell Emacs this is a -*- makefile-gmake -*-
+#
+# Copyright (c) 2019-2020 NVIDIA CORPORATION.  All Rights Reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+#
+# Build the doxygen output.
+#
+# Some assumptions: this is a local build only. You need installed:
+#
+#   doxygen
+#   python2
+#
+# You also must have $TOP/$TEGRA_TOP set; For L4T:
+#
+#   $ export TOP=$TEGRA_TOP
+#
+# Should work.
+#
+
+PYTHON2    = python2.7
+ARCH       = $(TEGRA_TOP)/core-private/tools/arch/arch.py
+NVGPU_ARCH = $(TOP)/kernel/nvgpu/arch
+ARCH_YAML  = $(NVGPU_ARCH)/*.yaml
+
+# Build the doxygen output. But make sure the sources file is generated
+# first.
+all: doxygen/sources
+	doxygen doxygen/Doxyfile.safety
+
+# Generate the sources file. This has a list of files that we shall include
+# in the doxygen output.
+doxygen/sources: $(ARCH) $(ARCH_YAML)
+	$(ARCH) --arch-file $(NVGPU_ARCH)/nvgpu.yaml files	\
+		--safe --gpu igpu --gpu both			\
+		--prefix 'INPUT += ' > doxygen/Doxyfile.sources.safety
+	@if [ ! -z "$(EXTRA_PATH)" ] ; then \
+		$(ARCH) --arch-file $(NVGPU_ARCH)/nvgpu.yaml \
+			--include-path $(EXTRA_PATH) files	\
+		--safe --gpu igpu --gpu both			\
+		--prefix 'INPUT += $(EXTRA_PATH)/' \
+		>> doxygen/Doxyfile.sources.safety ; \
+		cat $(EXTRA_PATH)/unit-tests/SWUTS.sources \
+			 >> doxygen/Doxyfile.sources.safety ; \
+	fi
+
+clean:
+	rm -rf doxygen/Doxyfile.sources.safety
+	rm -rf html
--- a/drivers/gpu/nvgpu/Makefile.linux.configs
+++ b/drivers/gpu/nvgpu/Makefile.linux.configs
@@ -0,0 +1,225 @@
+# Turn off all other configs, if CONFIG_GK20A is not set
+CONFIG_GK20A := m
+
+# Enable GK20A PMU features.
+CONFIG_GK20A_PMU := y
+
+# Enable support for the GK20A graphics engine on Tegra
+# by adding a Tegra platfrom interface to the GK20A driver.
+CONFIG_TEGRA_GK20A := y
+
+# Enable Support for Loading High Secure binary, and using
+# Write Protected Regions (WPR) for storing ucodes, and bootstrap
+# PMU, FECS and GPCCS in Low Secure mode.
+CONFIG_TEGRA_ACR := y
+
+# Support for debugger APIs
+CONFIG_NVGPU_DEBUGGER := y
+
+# Support for iGPU LS PMU enable/disable
+CONFIG_NVGPU_LS_PMU := y
+
+# Enable/Disable NVGPU logging
+CONFIG_NVGPU_LOGGING := y
+
+# Enable/Disable the support of HALs from chips that do not have functional
+# safety certification
+CONFIG_NVGPU_HAL_NON_FUSA := y
+
+# Support recovery on failure (which may involve engine reset)
+CONFIG_NVGPU_RECOVERY := y
+
+# Support for compression
+CONFIG_NVGPU_COMPRESSION := y
+
+# Enable support for extraction of comptags for CDE.
+ifeq ($(CONFIG_NVGPU_COMPRESSION),y)
+CONFIG_NVGPU_SUPPORT_CDE := y
+endif
+
+ifeq ($(CONFIG_COMMON_CLK),y)
+ifeq ($(CONFIG_PM_DEVFREQ),y)
+# Select this entry to enable gk20a scaling
+CONFIG_GK20A_DEVFREQ := y
+
+# Disable support to pass PM_QOS constraints to devfreq based scaling.
+CONFIG_GK20A_PM_QOS := n
+
+endif
+endif
+
+# Say Y here to allow nvgpu to track and keep statistics on
+# the system memory used by the driver. This does recreate
+# some of the kmem_leak tracking but this is also applicable
+# to other OSes which do not have Linux' kmem_leak.
+#CONFIG_NVGPU_TRACK_MEM_USAGE    := n
+
+# Enable the cycle stats debugging features.
+CONFIG_NVGPU_CYCLESTATS := y
+
+# Enable support for the NVGPU Context Switch Tracing. In this mode,
+# FECS collects timestamps for contexts loaded on GR engine. This
+# allows tracking context switches on GR engine, as well as
+# identifying processes that submitted work.
+CONFIG_NVGPU_FECS_TRACE := y
+
+# Enable support in GK20A for the nvhost (host1x) dma engine hardware
+# that includes things like hardware syncpts. This requires
+# TEGRA_GRHOST
+ifdef CONFIG_TEGRA_GRHOST
+CONFIG_TEGRA_GK20A_NVHOST := y
+endif
+ifdef CONFIG_TEGRA_HOST1X_NEXT
+CONFIG_TEGRA_GK20A_NVHOST := y
+CONFIG_TEGRA_GK20A_NVHOST_HOST1X := y
+endif
+
+# Enable support for GPUs on PCIe bus.
+ifeq ($(CONFIG_PCI),y)
+# Support for NVGPU DGPU
+CONFIG_NVGPU_DGPU := y
+endif
+
+# Enable nvgpu debug facility to redirect debug spew to ftrace. This
+# affects kernel memory use, so should not be enabled by default.
+ifeq ($(CONFIG_TRACING),y)
+#CONFIG_GK20A_TRACE_PRINTK := y
+endif
+
+# Use tegra_alloc_fd() for allocating dma_buf fds. This allocates
+# the fds above 1024 which exempts them from counting against process
+# fd limit.
+ifeq ($(CONFIG_NV_TEGRA_MC),y)
+CONFIG_NVGPU_USE_TEGRA_ALLOC_FD := y
+endif
+
+# Support Nvlink
+ifeq ($(CONFIG_TEGRA_NVLINK),y)
+CONFIG_NVGPU_NVLINK := y
+endif
+
+# Support NVGPU Virtualization
+ifeq ($(CONFIG_TEGRA_GR_VIRTUALIZATION),y)
+CONFIG_NVGPU_GR_VIRTUALIZATION := y
+endif
+
+# Support for NVGPU VPR
+ifeq ($(CONFIG_TEGRA_VPR),y)
+CONFIG_NVGPU_VPR := y
+endif
+
+# Support Tegra fuse
+ifeq ($(CONFIG_TEGRA_KFUSE),y)
+CONFIG_NVGPU_TEGRA_FUSE := y
+endif
+
+# GPU job synchronization (fences before and after submits) can use raw
+# syncpoints if available and sync fds if chosen. Without syncpoints,
+# nvgpu also provides semaphore-backed sync fds to userspace.
+#
+# Select which kernel-provided API is used for sync fds. Matching
+# support is required for the userspace drivers too.
+ifeq ($(CONFIG_SYNC),y)
+CONFIG_NVGPU_SYNCFD_ANDROID := y
+else ifeq ($(CONFIG_SYNC_FILE), y)
+CONFIG_NVGPU_SYNCFD_STABLE := y
+else
+CONFIG_NVGPU_SYNCFD_NONE := y
+endif
+
+# Below check indicates the build is invoked from Nvidia's
+# internal build system.
+ifneq ($(NV_BUILD_KERNEL_OPTIONS),)
+
+# Disable the below configs for kstable
+ifneq ($(findstring stable,$(NV_BUILD_KERNEL_OPTIONS)),)
+CONFIG_GK20A_DEVFREQ := n
+CONFIG_GK20A_PM_QOS := n
+else ifneq ($(filter 4.9 4.14,$(patsubst -,$(space),$(NV_BUILD_KERNEL_OPTIONS))),)
+# Enable support to pass PM_QOS constraints to devfreq based scaling.
+CONFIG_GK20A_PM_QOS := y
+endif
+
+endif
+
+ifeq ($(CONFIG_GK20A_PMU),y)
+ccflags-y += -DCONFIG_GK20A_PMU
+endif
+ifeq ($(CONFIG_TEGRA_GK20A),y)
+ccflags-y += -DCONFIG_TEGRA_GK20A
+endif
+ifeq ($(CONFIG_TEGRA_ACR),y)
+ccflags-y += -DCONFIG_TEGRA_ACR
+endif
+ifeq ($(CONFIG_NVGPU_DEBUGGER),y)
+ccflags-y += -DCONFIG_NVGPU_DEBUGGER
+endif
+ifeq ($(CONFIG_NVGPU_LS_PMU),y)
+ccflags-y += -DCONFIG_NVGPU_LS_PMU
+endif
+ifeq ($(CONFIG_NVGPU_LOGGING),y)
+ccflags-y += -DCONFIG_NVGPU_LOGGING
+endif
+ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),y)
+ccflags-y += -DCONFIG_NVGPU_HAL_NON_FUSA
+endif
+ifeq ($(CONFIG_NVGPU_RECOVERY),y)
+ccflags-y += -DCONFIG_NVGPU_RECOVERY
+endif
+ifeq ($(CONFIG_NVGPU_COMPRESSION),y)
+ccflags-y += -DCONFIG_NVGPU_COMPRESSION
+endif
+ifeq ($(CONFIG_NVGPU_SUPPORT_CDE),y)
+ccflags-y += -DCONFIG_NVGPU_SUPPORT_CDE
+endif
+ifeq ($(CONFIG_GK20A_DEVFREQ),y)
+ccflags-y += -DCONFIG_GK20A_DEVFREQ
+endif
+ifeq ($(CONFIG_GK20A_PM_QOS),y)
+ccflags-y += -DCONFIG_GK20A_PM_QOS
+endif
+ifeq ($(CONFIG_NVGPU_TRACK_MEM_USAGE),y)
+ccflags-y += -DCONFIG_NVGPU_TRACK_MEM_USAGE
+endif
+ifeq ($(CONFIG_NVGPU_CYCLESTATS),y)
+ccflags-y += -DCONFIG_NVGPU_CYCLESTATS
+endif
+ifeq ($(CONFIG_NVGPU_FECS_TRACE),y)
+ccflags-y += -DCONFIG_NVGPU_FECS_TRACE
+endif
+ifeq ($(CONFIG_TEGRA_GK20A_NVHOST),y)
+ccflags-y += -DCONFIG_TEGRA_GK20A_NVHOST
+endif
+ifeq ($(CONFIG_TEGRA_GK20A_NVHOST_HOST1X),y)
+ccflags-y += -DCONFIG_TEGRA_GK20A_NVHOST_HOST1X
+endif
+ifeq ($(CONFIG_NVGPU_DGPU),y)
+ccflags-y += -DCONFIG_NVGPU_DGPU
+endif
+ifeq ($(CONFIG_GK20A_TRACE_PRINTK),y)
+ccflags-y += -DCONFIG_GK20A_TRACE_PRINTK
+endif
+ifeq ($(CONFIG_NVGPU_USE_TEGRA_ALLOC_FD),y)
+ccflags-y += -DCONFIG_NVGPU_USE_TEGRA_ALLOC_FD
+endif
+ifeq ($(CONFIG_NVGPU_NVLINK),y)
+ccflags-y += -DCONFIG_NVGPU_NVLINK
+endif
+ifeq ($(CONFIG_NVGPU_GR_VIRTUALIZATION),y)
+ccflags-y += -DCONFIG_NVGPU_GR_VIRTUALIZATION
+endif
+ifeq ($(CONFIG_NVGPU_VPR),y)
+ccflags-y += -DCONFIG_NVGPU_VPR
+endif
+ifeq ($(CONFIG_NVGPU_TEGRA_FUSE),y)
+ccflags-y += -DCONFIG_NVGPU_TEGRA_FUSE
+endif
+ifeq ($(CONFIG_NVGPU_SYNCFD_ANDROID),y)
+ccflags-y += -DCONFIG_NVGPU_SYNCFD_ANDROID
+endif
+ifeq ($(CONFIG_NVGPU_SYNCFD_STABLE),y)
+ccflags-y += -DCONFIG_NVGPU_SYNCFD_STABLE
+endif
+ifeq ($(CONFIG_NVGPU_SYNCFD_NONE),y)
+ccflags-y += -DCONFIG_NVGPU_SYNCFD_NONE
+endif
--- a/drivers/gpu/nvgpu/Makefile.shared.configs
+++ b/drivers/gpu/nvgpu/Makefile.shared.configs
@@ -0,0 +1,319 @@
+#
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+
+# This file defines the make variables and cflags applicable to nvgpu common
+# shared by qnx, nvgpu userspace, test builds. Note that cflags are added to
+# variable NVGPU_COMMON_CFLAGS that needs to be used by the parent Makefile
+# to update corresponding cflags variable.
+
+# Default is the regular profile. That can be overridden if necessary. by
+# setting the NVGPU_FORCE_SAFETY_PROFILE. This is a useful hack while we
+# wait for the userspace tmake build to make its way into a proper safety
+# profile build.
+profile := default
+
+# Decide whether to use the safety release, safety debug or the regular profile.
+ifeq ($(NV_BUILD_CONFIGURATION_IS_SAFETY),1)
+profile := safety_release
+ifeq ($(NV_BUILD_CONFIGURATION_IS_DEBUG),1)
+profile := safety_debug
+endif
+endif
+
+ifeq ($(NVGPU_FORCE_SAFETY_PROFILE),1)
+profile := safety_release
+ifeq ($(NVGPU_FORCE_DEBUG_PROFILE),1)
+profile := safety_debug
+endif
+endif
+
+NVGPU_COMMON_CFLAGS		:=
+
+#
+# Flags always enabled regardless of build profile.
+#
+
+NVGPU_COMMON_CFLAGS		+=		\
+	-DCONFIG_TEGRA_GK20A_PMU=1		\
+	-DCONFIG_TEGRA_ACR=1			\
+	-DCONFIG_NVGPU_GR_VIRTUALIZATION	\
+	-DCONFIG_PCI_MSI
+
+CONFIG_NVGPU_LOGGING                 := 1
+NVGPU_COMMON_CFLAGS                  += -DCONFIG_NVGPU_LOGGING
+
+# Syncpoint support provided by nvhost is expected to exist.
+CONFIG_TEGRA_GK20A_NVHOST            := 1
+NVGPU_COMMON_CFLAGS                  += -DCONFIG_TEGRA_GK20A_NVHOST
+
+# Syncfds are a Linux feature.
+CONFIG_NVGPU_SYNCFD_NONE             := 1
+NVGPU_COMMON_CFLAGS                  += -DCONFIG_NVGPU_SYNCFD_NONE
+
+CONFIG_NVGPU_GRAPHICS                := 1
+NVGPU_COMMON_CFLAGS                  += -DCONFIG_NVGPU_GRAPHICS
+
+ifeq ($(profile),$(filter $(profile),safety_debug safety_release))
+
+# Enable golden context verification only for safety debug/release build
+NVGPU_COMMON_CFLAGS             +=      \
+	-DCONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION     \
+	-DCONFIG_NVGPU_BUILD_CONFIGURATION_IS_SAFETY
+
+## For tesing of CTXSW FW error codes manually, enable below configs in safety build.
+## Enable only one config at a time, because only one error can be tested at a time.
+# NVGPU_COMMON_CFLAGS             +=      \
+# 	-DCONFIG_NVGPU_CTXSW_FW_ERROR_WDT_TESTING
+#
+# NVGPU_COMMON_CFLAGS             +=      \
+# 	-DCONFIG_NVGPU_CTXSW_FW_ERROR_CODE_TESTING
+#
+# NVGPU_COMMON_CFLAGS             +=      \
+# 	-DCONFIG_NVGPU_CTXSW_FW_ERROR_HEADER_TESTING
+
+ifeq ($(CONFIG_NVGPU_DGPU),1)
+
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_DGPU
+
+CONFIG_NVGPU_NVLINK             := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_NVLINK
+
+# used by sec2 code
+CONFIG_NVGPU_ENGINE_QUEUE       := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_ENGINE_QUEUE
+
+# used in ce_app
+CONFIG_NVGPU_FENCE              := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FENCE
+
+# ce_app uses syncpt (nvgpu_nvhost_syncpt_wait_timeout_ext)
+CONFIG_NVGPU_KERNEL_MODE_SUBMIT := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT
+
+CONFIG_NVGPU_FALCON_NON_FUSA	:= 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FALCON_NON_FUSA
+
+CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT
+
+CONFIG_NVGPU_SM_DIVERSITY := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_SM_DIVERSITY
+
+CONFIG_NVGPU_USE_3LSS_ERR_INJECTION := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_USE_3LSS_ERR_INJECTION
+
+CONFIG_NVGPU_LS_PMU             := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_LS_PMU
+
+CONFIG_NVGPU_CLK_ARB            := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_CLK_ARB
+endif
+
+endif
+
+CONFIG_NVGPU_TEGRA_FUSE		:= 1
+NVGPU_COMMON_CFLAGS		+= -DCONFIG_NVGPU_TEGRA_FUSE
+
+#
+# Flags enabled only for safety debug and regular build profile.
+#
+ifneq ($(profile),safety_release)
+
+CONFIG_NVGPU_TRACE              := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_TRACE
+
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FALCON_DEBUG
+
+#
+# Flags enabled only for regular build profile.
+#
+ifneq ($(profile),safety_debug)
+
+CONFIG_NVGPU_SYSFS              := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_SYSFS
+
+# ACR feature to enable old tegra ACR profile support
+CONFIG_NVGPU_ACR_LEGACY         := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_ACR_LEGACY
+
+CONFIG_NVGPU_ENGINE_QUEUE       := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_ENGINE_QUEUE
+
+CONFIG_NVGPU_DEBUGGER           := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_DEBUGGER
+
+CONFIG_NVGPU_PROFILER           := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_PROFILER
+
+CONFIG_NVGPU_RECOVERY           := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_RECOVERY
+
+CONFIG_NVGPU_CILP               := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_CILP
+
+CONFIG_NVGPU_GFXP               := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_GFXP
+
+CONFIG_NVGPU_CYCLESTATS         := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_CYCLESTATS
+
+CONFIG_NVGPU_FECS_TRACE         := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FECS_TRACE
+
+ifneq ($(CONFIG_NVGPU_DGPU),1)
+CONFIG_NVGPU_IGPU_VIRT          := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_IGPU_VIRT
+endif
+
+# Enable the usage of 3LSS error injection features.
+CONFIG_NVGPU_USE_3LSS_ERR_INJECTION := 1
+NVGPU_COMMON_CFLAGS                 += -DCONFIG_NVGPU_USE_3LSS_ERR_INJECTION
+
+# Enable nvlink support for normal build.
+CONFIG_NVGPU_NVLINK             := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_NVLINK
+
+# Enable tpc_powergate support for normal build.
+CONFIG_NVGPU_TPC_POWERGATE      := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_TPC_POWERGATE
+
+# Enable mssnvlink0 reset control for normal build
+CONFIG_MSSNVLINK0_RST_CONTROL   := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_MSSNVLINK0_RST_CONTROL
+
+# Enable dgpu support for normal build.
+CONFIG_NVGPU_DGPU               := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_DGPU
+
+# Enable nvgpu_next for normal build
+ifneq ($(NV_BUILD_CONFIGURATION_IS_EXTERNAL), 1)
+CONFIG_NVGPU_NEXT               := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_NEXT
+endif
+
+CONFIG_NVGPU_VPR                := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_VPR
+
+CONFIG_NVGPU_REPLAYABLE_FAULT   := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_REPLAYABLE_FAULT
+
+# Enable LS PMU support for normal build
+CONFIG_NVGPU_LS_PMU             := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_LS_PMU
+
+# Enable elpg support for normal build
+CONFIG_NVGPU_POWER_PG           := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_POWER_PG
+
+# Enable sim support for normal build
+CONFIG_NVGPU_SIM                := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_SIM
+
+CONFIG_NVGPU_COMPRESSION        := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_COMPRESSION
+
+# Enable non FUSA HALs for normal build
+CONFIG_NVGPU_HAL_NON_FUSA       := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_HAL_NON_FUSA
+
+# Enable non FUSA common code for normal build
+CONFIG_NVGPU_NON_FUSA           := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_NON_FUSA
+
+CONFIG_NVGPU_CLK_ARB            := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_CLK_ARB
+
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FALCON_NON_FUSA
+
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_IOCTL_NON_FUSA
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_DETERMINISTIC_CHANNELS
+
+CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT := 1
+NVGPU_COMMON_CFLAGS                    += -DCONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT
+
+CONFIG_NVGPU_SET_FALCON_ACCESS_MAP := 1
+NVGPU_COMMON_CFLAGS                += -DCONFIG_NVGPU_SET_FALCON_ACCESS_MAP
+
+# Enable SW Semaphore for normal build
+CONFIG_NVGPU_SW_SEMAPHORE       := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_SW_SEMAPHORE
+
+# Enable Channel WDT for safety build until we switch to user mode submits only
+CONFIG_NVGPU_CHANNEL_WDT := 1
+NVGPU_COMMON_CFLAGS		+= -DCONFIG_NVGPU_CHANNEL_WDT
+
+# Enable Kernel Mode submit for safety build until we switch to user mode
+# submits only
+CONFIG_NVGPU_KERNEL_MODE_SUBMIT := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT
+
+# Enable fences for safety build till until we switch to user mode submits only
+CONFIG_NVGPU_FENCE              := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FENCE
+
+# Enable powergate lib for normal build
+CONFIG_NVGPU_USE_POWERGATE_LIB  := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_USE_POWERGATE_LIB
+
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FIFO_ENGINE_ACTIVITY
+# Enable dynamic busy/idle support
+CONFIG_NVGPU_DYNAMIC_BUSY_IDLE_SUPPORT := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_DYNAMIC_BUSY_IDLE_SUPPORT
+
+# Enable HW based error injection support
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_INJECT_HWERR
+
+# Enable Channel/TSG Scheduling
+CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING  := 1
+NVGPU_COMMON_CFLAGS                  += -DCONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
+
+# Enable Channel/TSG Control
+CONFIG_NVGPU_CHANNEL_TSG_CONTROL     := 1
+NVGPU_COMMON_CFLAGS                  += -DCONFIG_NVGPU_CHANNEL_TSG_CONTROL
+
+# Enable Virtualization server for normal build
+NVGPU_COMMON_CFLAGS             += -DCONFIG_TEGRA_GR_VIRTUALIZATION_SERVER
+
+# Enable SM diversity support for normal build
+CONFIG_NVGPU_SM_DIVERSITY            := 1
+NVGPU_COMMON_CFLAGS                  += -DCONFIG_NVGPU_SM_DIVERSITY
+
+# Enable Multi Instance GPU support for normal build
+CONFIG_NVGPU_MIG                     := 1
+NVGPU_COMMON_CFLAGS                  += -DCONFIG_NVGPU_MIG
+
+endif
+endif
+
+# Enable USERD only if kernel mode submit is supported
+ifeq ($(CONFIG_NVGPU_KERNEL_MODE_SUBMIT),1)
+CONFIG_NVGPU_USERD		:= 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_USERD
+endif
+
+ifeq ($(CONFIG_NVGPU_DEBUGGER),1)
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_ENGINE_RESET
+endif
+
+ifeq ($(CONFIG_NVGPU_RECOVERY),1)
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_ENGINE_RESET
+endif
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -0,0 +1,711 @@
+# -*- mode: makefile -*-
+#
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+srcs :=
+
+ifdef NVGPU_POSIX
+srcs +=	os/posix/nvgpu.c \
+	os/posix/posix-io.c \
+	os/posix/mock-registers.c \
+	os/posix/posix-nvgpu_mem.c \
+	os/posix/posix-dma.c \
+	os/posix/posix-vm.c \
+	os/posix/firmware.c \
+	os/posix/soc.c \
+	os/posix/error_notifier.c \
+	os/posix/posix-channel.c \
+	os/posix/posix-tsg.c \
+	os/posix/stubs.c \
+	os/posix/posix-nvhost.c \
+	os/posix/posix-vgpu.c \
+	os/posix/posix-dt.c \
+	os/posix/fuse.c
+
+ifdef CONFIG_NVGPU_VPR
+srcs += os/posix/posix-vpr.c
+endif
+
+ifdef CONFIG_NVGPU_FECS_TRACE
+srcs += os/posix/fecs_trace_posix.c
+endif
+
+ifeq ($(CONFIG_NVGPU_CLK_ARB),1)
+srcs += os/posix/posix-clk_arb.c
+endif
+
+ifdef CONFIG_NVGPU_NVLINK
+srcs +=	os/posix/posix-nvlink.c
+endif
+
+ifeq ($(CONFIG_NVGPU_COMPRESSION),1)
+srcs += os/posix/posix-comptags.c
+endif
+
+ifeq ($(CONFIG_NVGPU_LOGGING),1)
+srcs += os/posix/log.c
+endif
+
+ifeq ($(CONFIG_NVGPU_DGPU),1)
+srcs += os/posix/posix-vidmem.c
+endif
+endif
+
+# POSIX sources shared between the POSIX and QNX builds.
+srcs += os/posix/bug.c \
+	os/posix/rwsem.c \
+	os/posix/timers.c \
+	os/posix/cond.c \
+	os/posix/lock.c \
+	os/posix/thread.c \
+	os/posix/os_sched.c \
+	os/posix/bitmap.c \
+	os/posix/kmem.c \
+	os/posix/file_ops.c \
+	os/posix/queue.c
+
+ifeq ($(NV_BUILD_CONFIGURATION_IS_SAFETY),0)
+srcs += os/posix/bsearch.c
+endif
+
+srcs +=	common/device.c \
+	common/utils/enabled.c \
+	common/utils/errata.c \
+	common/utils/rbtree.c \
+	common/utils/string.c \
+	common/utils/worker.c \
+	common/swdebug/profile.c \
+	common/init/nvgpu_init.c \
+	common/mm/allocators/nvgpu_allocator.c \
+	common/mm/allocators/bitmap_allocator.c \
+	common/mm/allocators/buddy_allocator.c \
+	common/mm/gmmu/page_table.c \
+	common/mm/gmmu/pd_cache.c \
+	common/mm/gmmu/pte.c \
+	common/mm/as.c \
+	common/mm/vm.c \
+	common/mm/vm_area.c \
+	common/mm/nvgpu_mem.c \
+	common/mm/nvgpu_sgt.c \
+	common/mm/mm.c \
+	common/mm/dma.c \
+	common/therm/therm.c \
+	common/ltc/ltc.c \
+	common/fb/fb.c \
+	common/fbp/fbp.c \
+	common/io/io.c \
+	common/ecc.c \
+	common/falcon/falcon.c \
+	common/falcon/falcon_sw_gk20a.c \
+	common/gr/gr.c \
+	common/gr/gr_utils.c \
+	common/gr/gr_intr.c \
+	common/gr/global_ctx.c \
+	common/gr/subctx.c \
+	common/gr/ctx.c \
+	common/gr/gr_falcon.c \
+	common/gr/gr_config.c \
+	common/gr/gr_setup.c \
+	common/gr/obj_ctx.c \
+	common/gr/fs_state.c \
+	common/gr/gr_ecc.c \
+	common/netlist/netlist.c \
+	common/pmu/pmu.c \
+	common/acr/acr.c \
+	common/acr/acr_wpr.c \
+	common/acr/acr_blob_alloc.c \
+	common/acr/acr_blob_construct.c \
+	common/acr/acr_bootstrap.c \
+	common/acr/acr_sw_gv11b.c \
+	common/ptimer/ptimer.c \
+	common/power_features/cg/cg.c \
+	common/sync/channel_user_syncpt.c \
+	common/fifo/preempt.c \
+	common/fifo/channel.c \
+	common/fifo/fifo.c \
+	common/fifo/pbdma.c \
+	common/fifo/tsg.c \
+	common/fifo/runlist.c \
+	common/fifo/engine_status.c \
+	common/fifo/engines.c \
+	common/fifo/pbdma_status.c \
+	common/mc/mc.c \
+	common/rc/rc.c \
+	common/ce/ce.c \
+	common/grmgr/grmgr.c \
+	common/cic/cic.c \
+	common/cic/cic_intr.c \
+	common/cic/ce_cic.c \
+	common/cic/ctxsw_cic.c \
+	common/cic/ecc_cic.c \
+	common/cic/host_cic.c \
+	common/cic/gr_cic.c \
+	common/cic/pri_cic.c \
+	common/cic/pmu_cic.c \
+	common/cic/mmu_cic.c \
+	common/cic/msg_cic.c \
+	hal/init/hal_gv11b.c \
+	hal/init/hal_gv11b_litter.c \
+	hal/init/hal_init.c \
+	hal/power_features/cg/gv11b_gating_reglist.c \
+	hal/fifo/runlist_fifo_gv11b.c \
+	hal/fifo/userd_gk20a.c \
+	hal/sync/syncpt_cmdbuf_gv11b.c
+
+# Source files below are functionaly safe (FuSa) and must always be included.
+srcs += hal/mm/mm_gv11b_fusa.c \
+	hal/mm/mm_gp10b_fusa.c \
+	hal/mm/gmmu/gmmu_gv11b_fusa.c \
+	hal/mm/gmmu/gmmu_gp10b_fusa.c \
+	hal/mm/gmmu/gmmu_gk20a_fusa.c \
+	hal/mm/gmmu/gmmu_gm20b_fusa.c \
+	hal/mm/cache/flush_gk20a_fusa.c \
+	hal/mm/cache/flush_gv11b_fusa.c \
+	hal/mm/mmu_fault/mmu_fault_gv11b_fusa.c \
+	hal/ltc/intr/ltc_intr_gp10b_fusa.c \
+	hal/ltc/intr/ltc_intr_gv11b_fusa.c \
+	hal/bus/bus_gk20a_fusa.c \
+	hal/bus/bus_gm20b_fusa.c \
+	hal/bus/bus_gp10b_fusa.c \
+	hal/bus/bus_gv11b_fusa.c \
+	hal/ce/ce_gp10b_fusa.c \
+	hal/ce/ce_gv11b_fusa.c \
+	hal/class/class_gv11b_fusa.c \
+	hal/falcon/falcon_gk20a_fusa.c \
+	hal/fb/fb_gm20b_fusa.c \
+	hal/fb/fb_gv11b_fusa.c \
+	hal/fb/fb_mmu_fault_gv11b_fusa.c \
+	hal/fb/ecc/fb_ecc_gv11b_fusa.c \
+	hal/fb/intr/fb_intr_ecc_gv11b_fusa.c \
+	hal/fb/intr/fb_intr_gv11b_fusa.c \
+	hal/fifo/channel_gk20a_fusa.c \
+	hal/fifo/channel_gm20b_fusa.c \
+	hal/fifo/channel_gv11b_fusa.c \
+	hal/fifo/ctxsw_timeout_gv11b_fusa.c \
+	hal/fifo/engine_status_gm20b_fusa.c \
+	hal/fifo/engine_status_gv100_fusa.c \
+	hal/fifo/engines_gp10b_fusa.c \
+	hal/fifo/engines_gv11b_fusa.c \
+	hal/fifo/fifo_gk20a_fusa.c \
+	hal/fifo/fifo_gv11b_fusa.c \
+	hal/fifo/fifo_intr_gk20a_fusa.c \
+	hal/fifo/fifo_intr_gv11b_fusa.c \
+	hal/fifo/pbdma_gm20b_fusa.c \
+	hal/fifo/pbdma_gp10b_fusa.c \
+	hal/fifo/pbdma_gv11b_fusa.c \
+	hal/fifo/pbdma_status_gm20b_fusa.c \
+	hal/fifo/preempt_gv11b_fusa.c \
+	hal/fifo/ramfc_gp10b_fusa.c \
+	hal/fifo/ramfc_gv11b_fusa.c \
+	hal/fifo/ramin_gk20a_fusa.c \
+	hal/fifo/ramin_gm20b_fusa.c \
+	hal/fifo/ramin_gv11b_fusa.c \
+	hal/fifo/runlist_fifo_gk20a_fusa.c \
+	hal/fifo/runlist_fifo_gv11b_fusa.c \
+	hal/fifo/runlist_ram_gv11b_fusa.c \
+	hal/fifo/tsg_gk20a_fusa.c \
+	hal/fifo/tsg_gv11b_fusa.c \
+	hal/fifo/usermode_gv11b_fusa.c \
+	hal/fuse/fuse_gm20b_fusa.c \
+	hal/fuse/fuse_gp10b_fusa.c \
+	hal/gr/config/gr_config_gm20b_fusa.c \
+	hal/gr/config/gr_config_gv100_fusa.c \
+	hal/gr/ctxsw_prog/ctxsw_prog_gm20b_fusa.c \
+	hal/gr/ctxsw_prog/ctxsw_prog_gp10b_fusa.c \
+	hal/gr/ctxsw_prog/ctxsw_prog_gv11b_fusa.c \
+	hal/gr/ecc/ecc_gv11b_fusa.c \
+	hal/gr/falcon/gr_falcon_gm20b_fusa.c \
+	hal/gr/falcon/gr_falcon_gp10b_fusa.c \
+	hal/gr/falcon/gr_falcon_gv11b_fusa.c \
+	hal/gr/init/gr_init_gm20b_fusa.c \
+	hal/gr/init/gr_init_gp10b_fusa.c \
+	hal/gr/init/gr_init_gv11b_fusa.c \
+	hal/gr/intr/gr_intr_gm20b_fusa.c \
+	hal/gr/intr/gr_intr_gp10b_fusa.c \
+	hal/gr/intr/gr_intr_gv11b_fusa.c \
+	hal/ltc/ltc_gm20b_fusa.c \
+	hal/ltc/ltc_gp10b_fusa.c \
+	hal/ltc/ltc_gv11b_fusa.c \
+	hal/mc/mc_gm20b_fusa.c \
+	hal/mc/mc_gp10b_fusa.c \
+	hal/mc/mc_gv11b_fusa.c \
+	hal/netlist/netlist_gv11b_fusa.c \
+	hal/pmu/pmu_gk20a_fusa.c \
+	hal/pmu/pmu_gv11b_fusa.c \
+	hal/priv_ring/priv_ring_gm20b_fusa.c \
+	hal/priv_ring/priv_ring_gp10b_fusa.c \
+	hal/ptimer/ptimer_gk20a_fusa.c \
+	hal/sync/syncpt_cmdbuf_gv11b_fusa.c \
+	hal/therm/therm_gv11b_fusa.c \
+	hal/top/top_gm20b_fusa.c \
+	hal/top/top_gv11b_fusa.c \
+	hal/cic/cic_gv11b_fusa.c \
+	hal/cic/cic_lut_gv11b_fusa.c
+
+# Source files below are not guaranteed to be functionaly safe (FuSa) and are
+# only included in the normal build.
+ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),1)
+srcs += hal/init/hal_gp10b.c \
+	hal/init/hal_gp10b_litter.c \
+	hal/init/hal_gm20b.c \
+	hal/init/hal_gm20b_litter.c \
+	hal/mm/cache/flush_gk20a.c \
+	hal/mm/mm_gm20b.c \
+	hal/mm/mm_gk20a.c \
+	hal/mm/gmmu/gmmu_gk20a.c \
+	hal/mm/gmmu/gmmu_gm20b.c \
+	hal/mc/mc_gm20b.c  \
+	hal/bus/bus_gk20a.c \
+	hal/class/class_gm20b.c \
+	hal/class/class_gp10b.c \
+	hal/clk/clk_gm20b.c \
+	hal/falcon/falcon_gk20a.c \
+	hal/gr/config/gr_config_gm20b.c \
+	hal/gr/ecc/ecc_gp10b.c \
+	hal/gr/ecc/ecc_gv11b.c \
+	hal/gr/init/gr_init_gm20b.c \
+	hal/gr/init/gr_init_gp10b.c \
+	hal/gr/init/gr_init_gv11b.c \
+	hal/gr/intr/gr_intr_gm20b.c \
+	hal/gr/intr/gr_intr_gp10b.c \
+	hal/gr/falcon/gr_falcon_gm20b.c \
+	hal/priv_ring/priv_ring_gm20b.c \
+	hal/power_features/cg/gm20b_gating_reglist.c \
+	hal/power_features/cg/gp10b_gating_reglist.c \
+	hal/ce/ce2_gk20a.c \
+	hal/therm/therm_gm20b.c \
+	hal/therm/therm_gp10b.c \
+	hal/ltc/ltc_gm20b.c \
+	hal/ltc/ltc_gp10b.c \
+	hal/ltc/intr/ltc_intr_gm20b.c \
+	hal/ltc/intr/ltc_intr_gp10b.c \
+	hal/fb/fb_gp10b.c \
+	hal/fb/fb_gp106.c \
+	hal/fb/fb_gm20b.c \
+	hal/fb/fb_gv11b.c \
+	hal/fb/intr/fb_intr_ecc_gv11b.c \
+	hal/fuse/fuse_gm20b.c \
+	hal/fifo/fifo_gk20a.c \
+	hal/fifo/preempt_gk20a.c \
+	hal/fifo/engines_gm20b.c \
+	hal/fifo/pbdma_gm20b.c \
+	hal/fifo/pbdma_gp10b.c \
+	hal/fifo/engine_status_gm20b.c \
+	hal/fifo/ramfc_gk20a.c  \
+	hal/fifo/ramfc_gp10b.c  \
+	hal/fifo/ramin_gk20a.c \
+	hal/fifo/ramin_gp10b.c \
+	hal/fifo/channel_gk20a.c \
+	hal/fifo/channel_gm20b.c \
+	hal/fifo/tsg_gk20a.c \
+	hal/fifo/fifo_intr_gk20a.c \
+	hal/fifo/mmu_fault_gk20a.c \
+	hal/fifo/mmu_fault_gm20b.c \
+	hal/fifo/mmu_fault_gp10b.c \
+	hal/fifo/ctxsw_timeout_gk20a.c \
+	hal/fifo/runlist_fifo_gk20a.c \
+	hal/fifo/runlist_ram_gk20a.c \
+	hal/netlist/netlist_gm20b.c \
+	hal/netlist/netlist_gp10b.c \
+	hal/sync/syncpt_cmdbuf_gk20a.c \
+	hal/pmu/pmu_gv11b.c \
+	hal/top/top_gm20b.c \
+	hal/top/top_gp106.c \
+	hal/top/top_gp10b.c \
+	hal/gr/ctxsw_prog/ctxsw_prog_gm20b.c \
+	hal/gr/ctxsw_prog/ctxsw_prog_gp10b.c \
+	hal/gr/ctxsw_prog/ctxsw_prog_gv11b.c
+else
+ifeq ($(CONFIG_NVGPU_DGPU),1)
+# non-FUSA files needed to build dGPU in safety
+srcs += hal/gr/falcon/gr_falcon_gm20b.c \
+	hal/fuse/fuse_gm20b.c \
+	hal/fb/fb_gp106.c \
+	hal/falcon/falcon_gk20a.c \
+	hal/bus/bus_gk20a.c \
+	hal/pmu/pmu_gv11b.c
+endif
+endif
+
+ifeq ($(CONFIG_NVGPU_CLK_ARB),1)
+srcs += \
+	common/clk_arb/clk_arb.c \
+	common/clk_arb/clk_arb_gp10b.c
+endif
+
+ifeq ($(CONFIG_NVGPU_ACR_LEGACY),1)
+srcs += \
+	common/acr/acr_blob_construct_v0.c \
+	common/acr/acr_sw_gm20b.c \
+	common/acr/acr_sw_gp10b.c
+endif
+
+ifeq ($(CONFIG_NVGPU_ENGINE_QUEUE),1)
+srcs += common/engine_queues/engine_mem_queue.c \
+	common/engine_queues/engine_dmem_queue.c \
+	common/engine_queues/engine_emem_queue.c \
+	common/engine_queues/engine_fb_queue.c
+endif
+
+ifeq ($(CONFIG_NVGPU_GRAPHICS),1)
+srcs += common/gr/zbc.c \
+	common/gr/zcull.c \
+	hal/gr/zbc/zbc_gm20b.c \
+	hal/gr/zbc/zbc_gp10b.c \
+	hal/gr/zbc/zbc_gv11b.c \
+	hal/gr/zcull/zcull_gm20b.c \
+	hal/gr/zcull/zcull_gv11b.c
+endif
+
+ifeq ($(CONFIG_NVGPU_DEBUGGER),1)
+srcs += common/debugger.c \
+	common/regops/regops.c \
+	common/gr/hwpm_map.c \
+	common/perf/perfbuf.c \
+	hal/regops/regops_gv11b.c \
+	hal/regops/allowlist_gv11b.c \
+	hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.c \
+	hal/gr/hwpm_map/hwpm_map_gv100.c \
+	hal/ltc/ltc_gm20b_dbg.c \
+	hal/ptimer/ptimer_gp10b.c \
+	hal/perf/perf_gv11b.c \
+	hal/perf/perf_tu104.c \
+	hal/gr/gr/gr_gk20a.c \
+	hal/gr/gr/gr_gm20b.c \
+	hal/gr/gr/gr_gp10b.c \
+	hal/gr/gr/gr_gv11b.c \
+	hal/gr/gr/gr_gv100.c \
+	hal/gr/gr/gr_tu104.c
+ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),1)
+srcs += hal/regops/regops_gm20b.c \
+	hal/regops/regops_gp10b.c \
+	hal/regops/regops_tu104.c \
+	hal/regops/allowlist_tu104.c \
+	hal/perf/perf_gm20b.c
+endif
+endif
+
+ifeq ($(CONFIG_NVGPU_PROFILER),1)
+srcs += common/profiler/profiler.c \
+	common/profiler/pm_reservation.c \
+	hal/priv_ring/priv_ring_gv11b.c \
+	hal/ptimer/ptimer_gv11b.c
+endif
+
+ifeq ($(CONFIG_NVGPU_KERNEL_MODE_SUBMIT),1)
+srcs += common/fifo/submit.c \
+        common/fifo/priv_cmdbuf.c \
+        common/fifo/job.c \
+        common/fifo/channel_worker.c \
+	common/sync/channel_sync.c \
+	common/sync/channel_sync_syncpt.c
+endif
+
+ifeq ($(CONFIG_NVGPU_CHANNEL_WDT),1)
+srcs += common/fifo/watchdog.c \
+	common/fifo/channel_wdt.c
+endif
+
+ifeq ($(CONFIG_NVGPU_SW_SEMAPHORE),1)
+srcs += common/semaphore/semaphore_sea.c \
+	common/semaphore/semaphore_pool.c \
+	common/semaphore/semaphore_hw.c \
+	common/semaphore/semaphore.c \
+	common/sync/channel_sync_semaphore.c \
+	hal/sync/sema_cmdbuf_gk20a.c \
+	hal/sync/sema_cmdbuf_gv11b.c
+endif
+
+ifeq ($(CONFIG_NVGPU_USERD),1)
+srcs += common/fifo/userd.c \
+	hal/fifo/userd_gv11b.c
+endif
+
+ifeq ($(CONFIG_NVGPU_RECOVERY),1)
+srcs += hal/rc/rc_gv11b.c
+ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),1)
+srcs += hal/rc/rc_gk20a.c
+endif
+endif
+
+ifeq ($(CONFIG_NVGPU_FENCE),1)
+srcs += common/fence/fence.c
+ifeq ($(CONFIG_TEGRA_GK20A_NVHOST),1)
+srcs += common/fence/fence_syncpt.c
+endif
+ifeq ($(CONFIG_NVGPU_SW_SEMAPHORE),1)
+srcs += common/fence/fence_sema.c
+endif
+endif
+
+ifeq ($(CONFIG_NVGPU_FECS_TRACE),1)
+srcs += common/gr/fecs_trace.c \
+	hal/gr/fecs_trace/fecs_trace_gm20b.c \
+	hal/gr/fecs_trace/fecs_trace_gv11b.c
+ifeq ($(CONFIG_NVGPU_IGPU_VIRT),1)
+srcs += common/vgpu/gr/fecs_trace_vgpu.c
+endif
+endif
+
+ifeq ($(CONFIG_NVGPU_CYCLESTATS),1)
+srcs += common/perf/cyclestats_snapshot.c \
+	common/cyclestats/cyclestats.c
+ifeq ($(CONFIG_NVGPU_IGPU_VIRT),1)
+srcs += common/vgpu/perf/cyclestats_snapshot_vgpu.c
+endif
+endif
+
+# POSIX file used for unit testing for both qnx and linux
+ifdef NVGPU_FAULT_INJECTION_ENABLEMENT
+srcs += os/posix/posix-fault-injection.c
+endif
+
+ifeq ($(CONFIG_NVGPU_LS_PMU),1)
+# Add LS PMU files which are required for normal build
+srcs += \
+	common/pmu/boardobj/boardobj.c \
+	common/pmu/boardobj/boardobjgrp.c \
+	common/pmu/boardobj/boardobjgrpmask.c \
+	common/pmu/boardobj/boardobjgrp_e255.c \
+	common/pmu/boardobj/boardobjgrp_e32.c \
+	common/pmu/clk/clk.c \
+	common/pmu/volt/volt.c \
+	common/pmu/clk/clk_domain.c \
+	common/pmu/clk/clk_fll.c \
+	common/pmu/clk/clk_prog.c \
+	common/pmu/clk/clk_vf_point.c \
+	common/pmu/clk/clk_vin.c \
+	common/pmu/fw/fw.c \
+	common/pmu/fw/fw_ver_ops.c \
+	common/pmu/fw/fw_ns_bootstrap.c \
+	common/pmu/ipc/pmu_cmd.c \
+	common/pmu/ipc/pmu_msg.c \
+	common/pmu/ipc/pmu_queue.c \
+	common/pmu/ipc/pmu_seq.c \
+	common/pmu/lpwr/rppg.c \
+	common/pmu/lsfm/lsfm.c \
+	common/pmu/lsfm/lsfm_sw_gm20b.c \
+	common/pmu/lsfm/lsfm_sw_gp10b.c \
+	common/pmu/lsfm/lsfm_sw_gv100.c \
+	common/pmu/lsfm/lsfm_sw_tu104.c \
+	common/pmu/perf/vfe_equ.c \
+	common/pmu/perf/vfe_var.c \
+	common/pmu/perf/perf.c \
+	common/pmu/perf/pstate.c \
+	common/pmu/perf/change_seq.c \
+	common/pmu/perfmon/pmu_perfmon.c \
+	common/pmu/perfmon/pmu_perfmon_sw_gm20b.c \
+	common/pmu/perfmon/pmu_perfmon_sw_gv11b.c \
+	common/pmu/pmgr/pmgr.c \
+	common/pmu/pmgr/pmgrpmu.c \
+	common/pmu/pmgr/pwrdev.c \
+	common/pmu/pmgr/pwrmonitor.c \
+	common/pmu/pmgr/pwrpolicy.c \
+	common/pmu/super_surface/super_surface.c \
+	common/pmu/therm/thrm.c \
+	common/pmu/therm/therm_channel.c \
+	common/pmu/therm/therm_dev.c \
+	common/pmu/volt/volt_dev.c \
+	common/pmu/volt/volt_policy.c \
+	common/pmu/volt/volt_rail.c \
+	common/pmu/allocator.c \
+	common/pmu/pmu_debug.c \
+	common/pmu/pmu_mutex.c \
+	common/pmu/pmu_pstate.c \
+	common/pmu/pmu_rtos_init.c \
+	hal/therm/therm_tu104.c \
+	hal/pmu/pmu_gk20a.c \
+	hal/pmu/pmu_gm20b.c \
+	hal/pmu/pmu_gp10b.c \
+	hal/pmu/pmu_tu104.c
+
+ifeq ($(CONFIG_NVGPU_POWER_PG),1)
+srcs += common/pmu/pg/pg_sw_gm20b.c \
+	common/pmu/pg/pg_sw_gp10b.c \
+	common/pmu/pg/pg_sw_gp106.c \
+	common/pmu/pg/pg_sw_gv11b.c \
+	common/pmu/pg/pmu_pg.c \
+	common/pmu/pg/pmu_aelpg.c
+endif
+
+ifeq ($(CONFIG_NVGPU_CLK_ARB),1)
+srcs += common/clk_arb/clk_arb_gv100.c
+endif
+
+endif
+
+ifeq ($(CONFIG_NVGPU_POWER_PG),1)
+srcs += common/power_features/pg/pg.c
+endif
+
+ifeq ($(CONFIG_NVGPU_IGPU_VIRT),1)
+srcs += common/vgpu/init/init_vgpu.c \
+	common/vgpu/ivc/comm_vgpu.c \
+	common/vgpu/intr/intr_vgpu.c \
+	common/vgpu/ptimer/ptimer_vgpu.c \
+	common/vgpu/top/top_vgpu.c \
+	common/vgpu/fifo/fifo_vgpu.c \
+	common/vgpu/fifo/channel_vgpu.c \
+	common/vgpu/fifo/tsg_vgpu.c \
+	common/vgpu/fifo/preempt_vgpu.c \
+	common/vgpu/fifo/runlist_vgpu.c \
+	common/vgpu/fifo/ramfc_vgpu.c \
+	common/vgpu/perf/perf_vgpu.c \
+	common/vgpu/profiler/profiler_vgpu.c \
+	common/vgpu/mm/mm_vgpu.c \
+	common/vgpu/mm/vm_vgpu.c \
+	common/vgpu/gr/gr_vgpu.c \
+	common/vgpu/fb/fb_vgpu.c \
+	common/vgpu/gr/ctx_vgpu.c \
+	common/vgpu/gr/subctx_vgpu.c \
+	common/vgpu/clk_vgpu.c \
+	common/vgpu/debugger_vgpu.c \
+	common/vgpu/pm_reservation_vgpu.c \
+	common/vgpu/ltc/ltc_vgpu.c \
+	common/vgpu/fbp/fbp_vgpu.c \
+	common/vgpu/ce_vgpu.c \
+	hal/vgpu/init/init_hal_vgpu.c \
+	hal/vgpu/init/vgpu_hal_gv11b.c \
+	hal/vgpu/fifo/fifo_gv11b_vgpu.c \
+	hal/vgpu/sync/syncpt_cmdbuf_gv11b_vgpu.c
+
+ifeq ($(CONFIG_NVGPU_USERD),1)
+srcs += common/vgpu/fifo/userd_vgpu.c
+endif
+
+ifeq ($(CONFIG_NVGPU_COMPRESSION),1)
+srcs += common/vgpu/cbc/cbc_vgpu.c
+endif
+endif
+
+ifeq ($(CONFIG_NVGPU_COMPRESSION),1)
+srcs += common/mm/comptags.c \
+	common/cbc/cbc.c \
+	hal/cbc/cbc_gm20b.c \
+	hal/cbc/cbc_gp10b.c \
+	hal/cbc/cbc_gv11b.c
+endif
+
+ifeq ($(CONFIG_NVGPU_NVLINK),1)
+srcs += common/vbios/nvlink_bios.c \
+	common/nvlink/probe.c \
+	common/nvlink/init/device_reginit.c \
+	common/nvlink/init/device_reginit_gv100.c \
+	common/nvlink/minion.c \
+	common/nvlink/link_mode_transitions.c \
+	common/nvlink/nvlink.c \
+	hal/nvlink/minion_gv100.c \
+	hal/nvlink/minion_tu104.c \
+	hal/nvlink/nvlink_gv100.c \
+	hal/nvlink/nvlink_tu104.c \
+	hal/nvlink/intr_and_err_handling_tu104.c \
+	hal/nvlink/link_mode_transitions_gv100.c \
+	hal/nvlink/link_mode_transitions_tu104.c
+endif
+
+ifeq ($(CONFIG_NVGPU_DGPU),1)
+srcs += common/sec2/sec2.c \
+	common/sec2/sec2_allocator.c \
+	common/sec2/sec2_lsfm.c \
+	common/sec2/ipc/sec2_cmd.c \
+	common/sec2/ipc/sec2_msg.c \
+	common/sec2/ipc/sec2_queue.c \
+	common/sec2/ipc/sec2_seq.c \
+	common/vbios/bios.c \
+	common/vbios/bios_sw_gv100.c \
+	common/vbios/bios_sw_tu104.c \
+	common/falcon/falcon_sw_tu104.c \
+	common/acr/acr_sw_tu104.c \
+	common/mm/allocators/page_allocator.c \
+	common/mm/vidmem.c \
+	common/pramin.c \
+	common/ce/ce_app.c \
+	common/sbr/sbr.c \
+	hal/mm/mm_gv100.c \
+	hal/mm/mm_tu104.c \
+	hal/mc/mc_gv100.c  \
+	hal/mc/mc_tu104.c  \
+	hal/bus/bus_gv100.c \
+	hal/bus/bus_tu104.c \
+	hal/ce/ce_tu104.c \
+	hal/class/class_tu104.c \
+	hal/clk/clk_tu104.c \
+	hal/clk/clk_mon_tu104.c \
+	hal/gr/init/gr_init_gv100.c \
+	hal/gr/init/gr_init_tu104.c \
+	hal/gr/intr/gr_intr_tu104.c \
+	hal/gr/falcon/gr_falcon_tu104.c \
+	hal/fbpa/fbpa_tu104.c \
+	hal/init/hal_tu104.c \
+	hal/init/hal_tu104_litter.c \
+	hal/power_features/cg/tu104_gating_reglist.c \
+	hal/ltc/ltc_tu104.c \
+	hal/fb/fb_gv100.c \
+	hal/fb/fb_tu104.c \
+	hal/fb/fb_mmu_fault_tu104.c \
+	hal/fb/intr/fb_intr_gv100.c \
+	hal/fb/intr/fb_intr_tu104.c \
+	hal/func/func_tu104.c \
+	hal/fifo/fifo_tu104.c \
+	hal/fifo/usermode_tu104.c \
+	hal/fifo/pbdma_tu104.c \
+	hal/fifo/ramfc_tu104.c  \
+	hal/fifo/ramin_tu104.c \
+	hal/fifo/channel_gv100.c \
+	hal/fifo/runlist_ram_tu104.c \
+	hal/fifo/runlist_fifo_gv100.c \
+	hal/fifo/runlist_fifo_tu104.c \
+	hal/fifo/fifo_intr_gv100.c \
+	hal/fuse/fuse_gp106.c \
+	hal/fuse/fuse_tu104.c \
+	hal/netlist/netlist_gv100.c \
+	hal/netlist/netlist_tu104.c \
+	hal/nvdec/nvdec_gp106.c \
+	hal/nvdec/nvdec_tu104.c \
+	hal/gsp/gsp_tu104.c \
+	hal/sec2/sec2_tu104.c \
+	hal/pramin/pramin_gp10b.c \
+	hal/pramin/pramin_gv100.c \
+	hal/pramin/pramin_init.c \
+	hal/pramin/pramin_tu104.c \
+	hal/bios/bios_tu104.c \
+	hal/top/top_gv100.c \
+	hal/xve/xve_gp106.c \
+	hal/xve/xve_tu104.c
+
+ifeq ($(CONFIG_NVGPU_COMPRESSION),1)
+srcs +=	hal/cbc/cbc_tu104.c
+endif
+endif
+
+ifeq ($(CONFIG_NVGPU_SIM),1)
+srcs += common/sim/sim.c \
+	common/sim/sim_pci.c \
+	common/sim/sim_netlist.c
+endif
+
+ifeq ($(CONFIG_NVGPU_NON_FUSA),1)
+srcs +=	common/power_features/power_features.c
+endif
+
+ifeq ($(CONFIG_NVGPU_TPC_POWERGATE),1)
+srcs +=	hal/tpc/tpc_gv11b.c
+endif
--- a/drivers/gpu/nvgpu/common/acr/acr.c
+++ b/drivers/gpu/nvgpu/common/acr/acr.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/types.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/firmware.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/acr.h>
+
+#include "acr_priv.h"
+#ifdef CONFIG_NVGPU_ACR_LEGACY
+#include "acr_sw_gm20b.h"
+#include "acr_sw_gp10b.h"
+#endif
+#include "acr_sw_gv11b.h"
+#ifdef CONFIG_NVGPU_DGPU
+#include "acr_sw_tu104.h"
+#endif
+
+#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA)
+#include "nvgpu_next_gpuid.h"
+#endif
+
+/* ACR public API's */
+bool nvgpu_acr_is_lsf_lazy_bootstrap(struct gk20a *g, struct nvgpu_acr *acr,
+	u32 falcon_id)
+{
+	if (acr == NULL) {
+		return false;
+	}
+
+	if ((falcon_id == FALCON_ID_FECS) || (falcon_id == FALCON_ID_PMU) ||
+		(falcon_id == FALCON_ID_GPCCS)) {
+		return acr->lsf[falcon_id].is_lazy_bootstrap;
+	} else {
+		nvgpu_err(g, "Invalid falcon id\n");
+		return false;
+	}
+}
+
+#ifdef CONFIG_NVGPU_DGPU
+int nvgpu_acr_alloc_blob_prerequisite(struct gk20a *g, struct nvgpu_acr *acr,
+	size_t size)
+{
+	if (acr == NULL) {
+		return -EINVAL;
+	}
+
+	return acr->alloc_blob_space(g, size, &acr->ucode_blob);
+}
+#endif
+
+/* ACR blob construct & bootstrap */
+int nvgpu_acr_bootstrap_hs_acr(struct gk20a *g, struct nvgpu_acr *acr)
+{
+	int err = 0;
+
+	if (acr == NULL) {
+		return -EINVAL;
+	}
+
+	err = acr->bootstrap_hs_acr(g, acr);
+	if (err != 0) {
+		nvgpu_err(g, "ACR bootstrap failed");
+	}
+
+	nvgpu_log(g, gpu_dbg_gr, "ACR bootstrap Done");
+	return err;
+}
+
+int nvgpu_acr_construct_execute(struct gk20a *g)
+{
+	int err = 0;
+
+	if (g->acr == NULL) {
+		return -EINVAL;
+	}
+
+	err = g->acr->prepare_ucode_blob(g);
+	if (err != 0) {
+		nvgpu_err(g, "ACR ucode blob prepare failed");
+		goto done;
+	}
+
+	err = nvgpu_acr_bootstrap_hs_acr(g, g->acr);
+	if (err != 0) {
+		nvgpu_err(g, "Bootstrap HS ACR failed");
+	}
+
+done:
+	return err;
+}
+
+/* ACR init */
+int nvgpu_acr_init(struct gk20a *g)
+{
+	u32 ver = nvgpu_safe_add_u32(g->params.gpu_arch,
+					g->params.gpu_impl);
+	int err = 0;
+
+	if (g->acr != NULL) {
+		/*
+		 * Recovery/unrailgate case, we do not need to do ACR init as ACR is
+		 * set during cold boot & doesn't execute ACR clean up as part off
+		 * sequence, so reuse to perform faster boot.
+		 */
+		return err;
+	}
+
+	g->acr = (struct nvgpu_acr *)nvgpu_kzalloc(g, sizeof(struct nvgpu_acr));
+	if (g->acr == NULL) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	switch (ver) {
+#ifdef CONFIG_NVGPU_ACR_LEGACY
+	case GK20A_GPUID_GM20B:
+	case GK20A_GPUID_GM20B_B:
+		nvgpu_gm20b_acr_sw_init(g, g->acr);
+		break;
+	case NVGPU_GPUID_GP10B:
+		nvgpu_gp10b_acr_sw_init(g, g->acr);
+		break;
+#endif
+	case NVGPU_GPUID_GV11B:
+		nvgpu_gv11b_acr_sw_init(g, g->acr);
+		break;
+#if defined(CONFIG_NVGPU_NEXT)
+	case NVGPU_NEXT_GPUID:
+		nvgpu_next_acr_sw_init(g, g->acr);
+		break;
+#endif
+#ifdef CONFIG_NVGPU_DGPU
+	case NVGPU_GPUID_TU104:
+		nvgpu_tu104_acr_sw_init(g, g->acr);
+		break;
+#if defined(CONFIG_NVGPU_NEXT)
+	case NVGPU_NEXT_DGPU_GPUID:
+		nvgpu_next_dgpu_acr_sw_init(g, g->acr);
+		break;
+#endif
+#endif
+	default:
+		nvgpu_kfree(g, g->acr);
+		err = -EINVAL;
+		nvgpu_err(g, "no support for GPUID %x", ver);
+		break;
+	}
+
+done:
+	return err;
+}
--- a/drivers/gpu/nvgpu/common/acr/acr_blob_alloc.c
+++ b/drivers/gpu/nvgpu/common/acr/acr_blob_alloc.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/types.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/gk20a.h>
+
+#include "acr_wpr.h"
+#include "acr_priv.h"
+#include "acr_blob_alloc.h"
+
+int nvgpu_acr_alloc_blob_space_sys(struct gk20a *g, size_t size,
+	struct nvgpu_mem *mem)
+{
+	return nvgpu_dma_alloc_flags_sys(g, NVGPU_DMA_PHYSICALLY_ADDRESSED,
+		size, mem);
+}
+#ifdef CONFIG_NVGPU_DGPU
+int nvgpu_acr_alloc_blob_space_vid(struct gk20a *g, size_t size,
+	struct nvgpu_mem *mem)
+{
+	struct wpr_carveout_info wpr_inf;
+	int err;
+
+	if (mem->size != 0ULL) {
+		return 0;
+	}
+
+	g->acr->get_wpr_info(g, &wpr_inf);
+
+	/*
+	 * Even though this mem_desc wouldn't be used, the wpr region needs to
+	 * be reserved in the allocator.
+	 */
+	err = nvgpu_dma_alloc_vid_at(g, wpr_inf.size,
+		&g->acr->wpr_dummy, wpr_inf.wpr_base);
+	if (err != 0) {
+		return err;
+	}
+
+	return nvgpu_dma_alloc_vid_at(g, wpr_inf.size, mem,
+		wpr_inf.nonwpr_base);
+}
+#endif
--- a/drivers/gpu/nvgpu/common/acr/acr_blob_alloc.h
+++ b/drivers/gpu/nvgpu/common/acr/acr_blob_alloc.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef ACR_BLOB_ALLOC_H
+#define ACR_BLOB_ALLOC_H
+
+struct gk20a;
+struct nvgpu_mem;
+
+int nvgpu_acr_alloc_blob_space_sys(struct gk20a *g, size_t size,
+	struct nvgpu_mem *mem);
+#ifdef CONFIG_NVGPU_DGPU
+int nvgpu_acr_alloc_blob_space_vid(struct gk20a *g, size_t size,
+	struct nvgpu_mem *mem);
+#endif
+
+#endif /* ACR_BLOB_ALLOC_H */
--- a/drivers/gpu/nvgpu/common/acr/acr_blob_construct.c
+++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct.c
--- a/drivers/gpu/nvgpu/common/acr/acr_blob_construct.h
+++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef ACR_BLOB_CONSTRUCT_H
+#define ACR_BLOB_CONSTRUCT_H
+
+#include <nvgpu/falcon.h>
+#include <nvgpu/flcnif_cmn.h>
+#include <nvgpu/pmu.h>
+
+#include "nvgpu_acr_interface.h"
+
+#define UCODE_NB_MAX_DATE_LENGTH  64U
+struct ls_falcon_ucode_desc {
+	u32 descriptor_size;
+	u32 image_size;
+	u32 tools_version;
+	u32 app_version;
+	char date[UCODE_NB_MAX_DATE_LENGTH];
+	u32 bootloader_start_offset;
+	u32 bootloader_size;
+	u32 bootloader_imem_offset;
+	u32 bootloader_entry_point;
+	u32 app_start_offset;
+	u32 app_size;
+	u32 app_imem_offset;
+	u32 app_imem_entry;
+	u32 app_dmem_offset;
+	u32 app_resident_code_offset;
+	u32 app_resident_code_size;
+	u32 app_resident_data_offset;
+	u32 app_resident_data_size;
+	u32 nb_imem_overlays;
+	u32 nb_dmem_overlays;
+	struct {u32 start; u32 size; } load_ovl[UCODE_NB_MAX_DATE_LENGTH];
+	u32 compressed;
+};
+
+struct ls_falcon_ucode_desc_v1 {
+	u32 descriptor_size;
+	u32 image_size;
+	u32 tools_version;
+	u32 app_version;
+	char date[UCODE_NB_MAX_DATE_LENGTH];
+	u32 secure_bootloader;
+	u32 bootloader_start_offset;
+	u32 bootloader_size;
+	u32 bootloader_imem_offset;
+	u32 bootloader_entry_point;
+	u32 app_start_offset;
+	u32 app_size;
+	u32 app_imem_offset;
+	u32 app_imem_entry;
+	u32 app_dmem_offset;
+	u32 app_resident_code_offset;
+	u32 app_resident_code_size;
+	u32 app_resident_data_offset;
+	u32 app_resident_data_size;
+	u32 nb_imem_overlays;
+	u32 nb_dmem_overlays;
+	struct {u32 start; u32 size; } load_ovl[64];
+	u32 compressed;
+};
+
+struct flcn_ucode_img {
+	u32 *data;
+	struct ls_falcon_ucode_desc *desc;
+	u32 data_size;
+	struct lsf_ucode_desc *lsf_desc;
+	bool is_next_core_img;
+#if defined(CONFIG_NVGPU_NEXT)
+	struct falcon_next_core_ucode_desc *ndesc;
+#endif
+};
+
+struct lsfm_managed_ucode_img {
+	struct lsfm_managed_ucode_img *next;
+	struct lsf_wpr_header wpr_header;
+	struct lsf_lsb_header lsb_header;
+	struct flcn_bl_dmem_desc bl_gen_desc;
+	u32 bl_gen_desc_size;
+	u32 full_ucode_size;
+	struct flcn_ucode_img ucode_img;
+};
+
+#ifdef CONFIG_NVGPU_DGPU
+/*
+ * LSF shared SubWpr Header
+ *
+ * use_case_id - Shared SubWpr use case ID (updated by nvgpu)
+ * start_addr  - start address of subWpr (updated by nvgpu)
+ * size_4K     - size of subWpr in 4K (updated by nvgpu)
+ */
+struct lsf_shared_sub_wpr_header {
+	u32 use_case_id;
+	u32 start_addr;
+	u32 size_4K;
+};
+
+/*
+ * LSFM SUB WPRs struct
+ * pnext          : Next entry in the list, NULL if last
+ * sub_wpr_header : SubWpr Header struct
+ */
+struct lsfm_sub_wpr {
+	struct lsfm_sub_wpr *pnext;
+	struct lsf_shared_sub_wpr_header sub_wpr_header;
+};
+#endif
+
+struct ls_flcn_mgr {
+	u16 managed_flcn_cnt;
+	u32 wpr_size;
+	struct lsfm_managed_ucode_img *ucode_img_list;
+#ifdef CONFIG_NVGPU_DGPU
+	u16 managed_sub_wpr_count;
+	struct lsfm_sub_wpr *psub_wpr_list;
+#endif
+};
+
+int nvgpu_acr_prepare_ucode_blob(struct gk20a *g);
+#ifdef CONFIG_NVGPU_LS_PMU
+int nvgpu_acr_lsf_pmu_ucode_details(struct gk20a *g, void *lsf_ucode_img);
+#if defined(CONFIG_NVGPU_NEXT)
+s32 nvgpu_acr_lsf_pmu_ncore_ucode_details(struct gk20a *g, void *lsf_ucode_img);
+#endif
+#endif
+int nvgpu_acr_lsf_fecs_ucode_details(struct gk20a *g, void *lsf_ucode_img);
+int nvgpu_acr_lsf_gpccs_ucode_details(struct gk20a *g, void *lsf_ucode_img);
+#ifdef CONFIG_NVGPU_DGPU
+int nvgpu_acr_lsf_sec2_ucode_details(struct gk20a *g, void *lsf_ucode_img);
+#endif
+
+#endif /* ACR_BLOB_CONSTRUCT_H */
--- a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c
+++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c
@@ -0,0 +1,801 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/firmware.h>
+#include <nvgpu/pmu.h>
+#include <nvgpu/falcon.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/string.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/gr/gr_falcon.h>
+#include <nvgpu/pmu/fw.h>
+#include <nvgpu/gr/gr_utils.h>
+
+#include "acr_blob_construct_v0.h"
+#include "acr_wpr.h"
+#include "acr_priv.h"
+
+#ifdef CONFIG_NVGPU_LS_PMU
+int nvgpu_acr_lsf_pmu_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img)
+{
+	struct lsf_ucode_desc_v0 *lsf_desc;
+	struct nvgpu_firmware *fw_sig;
+	struct nvgpu_firmware *fw_desc;
+	struct nvgpu_firmware *fw_image;
+	struct flcn_ucode_img_v0 *p_img = (struct flcn_ucode_img_v0 *)lsf_ucode_img;
+	int err = 0;
+
+	lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc_v0));
+	if (lsf_desc == NULL) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	fw_sig = nvgpu_pmu_fw_sig_desc(g, g->pmu);
+	fw_desc = nvgpu_pmu_fw_desc_desc(g, g->pmu);
+	fw_image = nvgpu_pmu_fw_image_desc(g, g->pmu);
+
+	nvgpu_memcpy((u8 *)lsf_desc, (u8 *)fw_sig->data,
+		min_t(size_t, sizeof(*lsf_desc), fw_sig->size));
+
+	lsf_desc->falcon_id = FALCON_ID_PMU;
+
+	p_img->desc = (struct pmu_ucode_desc *)(void *)fw_desc->data;
+	p_img->data = (u32 *)(void *)fw_image->data;
+	p_img->data_size = p_img->desc->image_size;
+	p_img->lsf_desc = (struct lsf_ucode_desc_v0 *)lsf_desc;
+
+exit:
+	return err;
+}
+#endif
+
+int nvgpu_acr_lsf_fecs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img)
+{
+	struct lsf_ucode_desc_v0 *lsf_desc;
+	struct nvgpu_firmware *fecs_sig;
+	struct flcn_ucode_img_v0 *p_img = (struct flcn_ucode_img_v0 *)lsf_ucode_img;
+	struct nvgpu_gr_falcon *gr_falcon = nvgpu_gr_get_falcon_ptr(g);
+	struct nvgpu_ctxsw_ucode_segments *fecs =
+			nvgpu_gr_falcon_get_fecs_ucode_segments(gr_falcon);
+	int err;
+
+	fecs_sig = nvgpu_request_firmware(g, GM20B_FECS_UCODE_SIG, 0);
+	if (fecs_sig == NULL) {
+		nvgpu_err(g, "failed to load fecs sig");
+		return -ENOENT;
+	}
+	lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc_v0));
+	if (lsf_desc == NULL) {
+		err = -ENOMEM;
+		goto rel_sig;
+	}
+	nvgpu_memcpy((u8 *)lsf_desc, (u8 *)fecs_sig->data,
+			min_t(size_t, sizeof(*lsf_desc), fecs_sig->size));
+
+	lsf_desc->falcon_id = FALCON_ID_FECS;
+
+	p_img->desc = nvgpu_kzalloc(g, sizeof(struct pmu_ucode_desc));
+	if (p_img->desc == NULL) {
+		err = -ENOMEM;
+		goto free_lsf_desc;
+	}
+
+	p_img->desc->bootloader_start_offset = fecs->boot.offset;
+	p_img->desc->bootloader_size = NVGPU_ALIGN(fecs->boot.size, 256U);
+	p_img->desc->bootloader_imem_offset = fecs->boot_imem_offset;
+	p_img->desc->bootloader_entry_point = fecs->boot_entry;
+
+	p_img->desc->image_size = NVGPU_ALIGN(fecs->boot.size, 256U) +
+		NVGPU_ALIGN(fecs->code.size, 256U) + NVGPU_ALIGN(fecs->data.size, 256U);
+	p_img->desc->app_size = NVGPU_ALIGN(fecs->code.size, 256U) +
+					NVGPU_ALIGN(fecs->data.size, 256U);
+	p_img->desc->app_start_offset = fecs->code.offset;
+	p_img->desc->app_imem_offset = 0;
+	p_img->desc->app_imem_entry = 0;
+	p_img->desc->app_dmem_offset = 0;
+	p_img->desc->app_resident_code_offset = 0;
+	p_img->desc->app_resident_code_size = fecs->code.size;
+	p_img->desc->app_resident_data_offset =
+				fecs->data.offset - fecs->code.offset;
+	p_img->desc->app_resident_data_size = fecs->data.size;
+	p_img->data = nvgpu_gr_falcon_get_surface_desc_cpu_va(gr_falcon);
+	p_img->data_size = p_img->desc->image_size;
+
+	p_img->lsf_desc = (struct lsf_ucode_desc_v0 *)lsf_desc;
+	nvgpu_acr_dbg(g, "fecs fw loaded\n");
+	nvgpu_release_firmware(g, fecs_sig);
+	return 0;
+free_lsf_desc:
+	nvgpu_kfree(g, lsf_desc);
+rel_sig:
+	nvgpu_release_firmware(g, fecs_sig);
+	return err;
+}
+
+int nvgpu_acr_lsf_gpccs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img)
+{
+	struct lsf_ucode_desc_v0 *lsf_desc;
+	struct nvgpu_firmware *gpccs_sig;
+	struct flcn_ucode_img_v0 *p_img = (struct flcn_ucode_img_v0 *)lsf_ucode_img;
+	struct nvgpu_gr_falcon *gr_falcon = nvgpu_gr_get_falcon_ptr(g);
+	struct nvgpu_ctxsw_ucode_segments *gpccs =
+			nvgpu_gr_falcon_get_gpccs_ucode_segments(gr_falcon);
+	int err;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
+		return -ENOENT;
+	}
+
+	gpccs_sig = nvgpu_request_firmware(g, T18x_GPCCS_UCODE_SIG, 0);
+	if (gpccs_sig == NULL) {
+		nvgpu_err(g, "failed to load gpccs sig");
+		return -ENOENT;
+	}
+	lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc_v0));
+	if (lsf_desc == NULL) {
+		err = -ENOMEM;
+		goto rel_sig;
+	}
+	nvgpu_memcpy((u8 *)lsf_desc, (u8 *)gpccs_sig->data,
+			min_t(size_t, sizeof(*lsf_desc), gpccs_sig->size));
+	lsf_desc->falcon_id = FALCON_ID_GPCCS;
+
+	p_img->desc = nvgpu_kzalloc(g, sizeof(struct pmu_ucode_desc));
+	if (p_img->desc == NULL) {
+		err = -ENOMEM;
+		goto free_lsf_desc;
+	}
+
+	p_img->desc->bootloader_start_offset =
+		0;
+	p_img->desc->bootloader_size = NVGPU_ALIGN(gpccs->boot.size, 256U);
+	p_img->desc->bootloader_imem_offset = gpccs->boot_imem_offset;
+	p_img->desc->bootloader_entry_point = gpccs->boot_entry;
+
+	p_img->desc->image_size = NVGPU_ALIGN(gpccs->boot.size, 256U) +
+		NVGPU_ALIGN(gpccs->code.size, 256U) +
+		NVGPU_ALIGN(gpccs->data.size, 256U);
+	p_img->desc->app_size = NVGPU_ALIGN(gpccs->code.size, 256U) +
+		NVGPU_ALIGN(gpccs->data.size, 256U);
+	p_img->desc->app_start_offset = p_img->desc->bootloader_size;
+	p_img->desc->app_imem_offset = 0;
+	p_img->desc->app_imem_entry = 0;
+	p_img->desc->app_dmem_offset = 0;
+	p_img->desc->app_resident_code_offset = 0;
+	p_img->desc->app_resident_code_size = NVGPU_ALIGN(gpccs->code.size, 256U);
+	p_img->desc->app_resident_data_offset =
+		NVGPU_ALIGN(gpccs->data.offset, 256U) -
+		NVGPU_ALIGN(gpccs->code.offset, 256U);
+	p_img->desc->app_resident_data_size = NVGPU_ALIGN(gpccs->data.size, 256U);
+	p_img->data = (u32 *)
+		((u8 *)nvgpu_gr_falcon_get_surface_desc_cpu_va(gr_falcon) +
+							gpccs->boot.offset);
+	p_img->data_size = NVGPU_ALIGN(p_img->desc->image_size, 256U);
+	p_img->lsf_desc = (struct lsf_ucode_desc_v0 *)lsf_desc;
+	nvgpu_acr_dbg(g, "gpccs fw loaded\n");
+	nvgpu_release_firmware(g, gpccs_sig);
+	return 0;
+free_lsf_desc:
+	nvgpu_kfree(g, lsf_desc);
+rel_sig:
+	nvgpu_release_firmware(g, gpccs_sig);
+	return err;
+}
+
+/*
+ * @brief lsfm_fill_static_lsb_hdr_info
+ * Populate static LSB header information using the provided ucode image
+ */
+static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g,
+	u32 falcon_id, struct lsfm_managed_ucode_img_v0 *pnode)
+{
+	u32 full_app_size = 0;
+	u32 data = 0;
+
+	if (pnode->ucode_img.lsf_desc != NULL) {
+		nvgpu_memcpy((u8 *)&pnode->lsb_header.signature,
+			(u8 *)pnode->ucode_img.lsf_desc,
+			sizeof(struct lsf_ucode_desc_v0));
+	}
+	pnode->lsb_header.ucode_size = pnode->ucode_img.data_size;
+
+	/* Uses a loader. that is has a desc */
+	pnode->lsb_header.data_size = 0;
+
+	/*
+	 * The loader code size is already aligned (padded) such that
+	 * the code following it is aligned, but the size in the image
+	 * desc is not, bloat it up to be on a 256 byte alignment.
+	 */
+	pnode->lsb_header.bl_code_size = NVGPU_ALIGN(
+		pnode->ucode_img.desc->bootloader_size,
+		LSF_BL_CODE_SIZE_ALIGNMENT);
+	full_app_size = NVGPU_ALIGN(pnode->ucode_img.desc->app_size,
+		LSF_BL_CODE_SIZE_ALIGNMENT) +
+		pnode->lsb_header.bl_code_size;
+	pnode->lsb_header.ucode_size = NVGPU_ALIGN(
+		pnode->ucode_img.desc->app_resident_data_offset,
+		LSF_BL_CODE_SIZE_ALIGNMENT) +
+		pnode->lsb_header.bl_code_size;
+	pnode->lsb_header.data_size = full_app_size -
+		pnode->lsb_header.ucode_size;
+	/*
+	 * Though the BL is located at 0th offset of the image, the VA
+	 * is different to make sure that it doesn't collide the actual
+	 * OS VA range
+	 */
+	pnode->lsb_header.bl_imem_off =
+		pnode->ucode_img.desc->bootloader_imem_offset;
+
+	pnode->lsb_header.flags = 0;
+
+	if (falcon_id == FALCON_ID_PMU) {
+		data = NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
+		pnode->lsb_header.flags = data;
+	}
+
+	if (g->acr->lsf[falcon_id].is_priv_load) {
+		pnode->lsb_header.flags |=
+			NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE;
+	}
+}
+
+/* Adds a ucode image to the list of managed ucode images managed. */
+static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr_v0 *plsfm,
+	struct flcn_ucode_img_v0 *ucode_image, u32 falcon_id)
+{
+
+	struct lsfm_managed_ucode_img_v0 *pnode;
+
+	pnode = nvgpu_kzalloc(g, sizeof(struct lsfm_managed_ucode_img_v0));
+	if (pnode == NULL) {
+		return -ENOMEM;
+	}
+
+	/* Keep a copy of the ucode image info locally */
+	nvgpu_memcpy((u8 *)&pnode->ucode_img, (u8 *)ucode_image,
+		sizeof(struct flcn_ucode_img_v0));
+
+	/* Fill in static WPR header info*/
+	pnode->wpr_header.falcon_id = falcon_id;
+	pnode->wpr_header.bootstrap_owner = g->acr->bootstrap_owner;
+	pnode->wpr_header.status = LSF_IMAGE_STATUS_COPY;
+
+	pnode->wpr_header.lazy_bootstrap =
+		(u32)g->acr->lsf[falcon_id].is_lazy_bootstrap;
+
+	/* Fill in static LSB header info elsewhere */
+	lsfm_fill_static_lsb_hdr_info(g, falcon_id, pnode);
+	pnode->next = plsfm->ucode_img_list;
+	plsfm->ucode_img_list = pnode;
+	return 0;
+}
+
+/* Discover all managed falcon ucode images */
+static int lsfm_discover_ucode_images(struct gk20a *g,
+	struct ls_flcn_mgr_v0 *plsfm)
+{
+	struct flcn_ucode_img_v0 ucode_img;
+	struct nvgpu_acr *acr = g->acr;
+	u32 falcon_id;
+	u32 i;
+	int err = 0;
+
+	/*
+	 * Enumerate all constructed falcon objects, as we need the ucode
+	 * image info and total falcon count
+	 */
+	for (i = 0U; i < FALCON_ID_END; i++) {
+		if (nvgpu_test_bit(i, (void *)&acr->lsf_enable_mask) &&
+			acr->lsf[i].get_lsf_ucode_details != NULL) {
+
+			(void) memset(&ucode_img, 0, sizeof(ucode_img));
+
+			if (acr->lsf[i].get_lsf_ucode_details(g,
+				(void *)&ucode_img) != 0) {
+				nvgpu_err(g, "LS falcon-%d ucode get failed", i);
+				goto exit;
+			}
+
+			if (ucode_img.lsf_desc != NULL) {
+				/*
+				 * falon_id is formed by grabbing the static
+				 * base falonId from the image and adding the
+				 * engine-designated falcon instance.
+				 */
+				falcon_id = ucode_img.lsf_desc->falcon_id;
+
+				err = lsfm_add_ucode_img(g, plsfm, &ucode_img,
+					falcon_id);
+				if (err != 0) {
+					nvgpu_err(g, " Failed to add falcon-%d to LSFM ",
+						falcon_id);
+					goto exit;
+				}
+
+				plsfm->managed_flcn_cnt++;
+			}
+		}
+	}
+
+exit:
+	return err;
+}
+
+/* Generate WPR requirements for ACR allocation request */
+static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr_v0 *plsfm)
+{
+	struct lsfm_managed_ucode_img_v0 *pnode = plsfm->ucode_img_list;
+	u32 wpr_offset;
+
+	/*
+	 * Start with an array of WPR headers at the base of the WPR.
+	 * The expectation here is that the secure falcon will do a single DMA
+	 * read of this array and cache it internally so it's OK to pack these.
+	 * Also, we add 1 to the falcon count to indicate the end of the array.
+	 */
+	wpr_offset = U32(sizeof(struct lsf_wpr_header_v0)) *
+		(U32(plsfm->managed_flcn_cnt) + U32(1));
+
+	/*
+	 * Walk the managed falcons, accounting for the LSB structs
+	 * as well as the ucode images.
+	 */
+	while (pnode != NULL) {
+		/* Align, save off, and include an LSB header size */
+		wpr_offset = NVGPU_ALIGN(wpr_offset, LSF_LSB_HEADER_ALIGNMENT);
+		pnode->wpr_header.lsb_offset = wpr_offset;
+		wpr_offset += (u32)sizeof(struct lsf_lsb_header_v0);
+
+		/*
+		 * Align, save off, and include the original (static)
+		 * ucode image size
+		 */
+		wpr_offset = NVGPU_ALIGN(wpr_offset,
+			LSF_UCODE_DATA_ALIGNMENT);
+		pnode->lsb_header.ucode_off = wpr_offset;
+		wpr_offset += pnode->ucode_img.data_size;
+
+		/*
+		 * For falcons that use a boot loader (BL), we append a loader
+		 * desc structure on the end of the ucode image and consider this
+		 * the boot loader data. The host will then copy the loader desc
+		 * args to this space within the WPR region (before locking down)
+		 * and the HS bin will then copy them to DMEM 0 for the loader.
+		 */
+		/*
+		 * Track the size for LSB details filled in later
+		 * Note that at this point we don't know what kind of
+		 * boot loader desc, so we just take the size of the
+		 * generic one, which is the largest it will will ever be.
+		 */
+		/* Align (size bloat) and save off generic descriptor size */
+		pnode->lsb_header.bl_data_size = NVGPU_ALIGN(
+			(u32)sizeof(pnode->bl_gen_desc),
+			LSF_BL_DATA_SIZE_ALIGNMENT);
+
+		/* Align, save off, and include the additional BL data */
+		wpr_offset = NVGPU_ALIGN(wpr_offset,
+			LSF_BL_DATA_ALIGNMENT);
+		pnode->lsb_header.bl_data_off = wpr_offset;
+		wpr_offset += pnode->lsb_header.bl_data_size;
+
+		/* Finally, update ucode surface size to include updates */
+		pnode->full_ucode_size = wpr_offset -
+			pnode->lsb_header.ucode_off;
+		if (pnode->wpr_header.falcon_id != FALCON_ID_PMU) {
+			pnode->lsb_header.app_code_off =
+				pnode->lsb_header.bl_code_size;
+			pnode->lsb_header.app_code_size =
+				pnode->lsb_header.ucode_size -
+				pnode->lsb_header.bl_code_size;
+			pnode->lsb_header.app_data_off =
+				pnode->lsb_header.ucode_size;
+			pnode->lsb_header.app_data_size =
+				pnode->lsb_header.data_size;
+		}
+		pnode = pnode->next;
+	}
+	plsfm->wpr_size = wpr_offset;
+	return 0;
+}
+
+/* Initialize WPR contents */
+static int gm20b_pmu_populate_loader_cfg(struct gk20a *g,
+	void *lsfm, u32 *p_bl_gen_desc_size)
+{
+	struct wpr_carveout_info wpr_inf;
+	struct lsfm_managed_ucode_img_v0 *p_lsfm =
+			(struct lsfm_managed_ucode_img_v0 *)lsfm;
+	struct flcn_ucode_img_v0 *p_img = &(p_lsfm->ucode_img);
+	struct loader_config *ldr_cfg = &(p_lsfm->bl_gen_desc.loader_cfg);
+	u64 addr_base;
+	struct pmu_ucode_desc *desc;
+	u64 tmp;
+	u32 addr_code, addr_data;
+
+	if (p_img->desc == NULL) {
+		/*
+		 * This means its a header based ucode,
+		 * and so we do not fill BL gen desc structure
+		 */
+		return -EINVAL;
+	}
+	desc = p_img->desc;
+	/*
+	 * Calculate physical and virtual addresses for various portions of
+	 * the PMU ucode image
+	 * Calculate the 32-bit addresses for the application code, application
+	 * data, and bootloader code. These values are all based on IM_BASE.
+	 * The 32-bit addresses will be the upper 32-bits of the virtual or
+	 * physical addresses of each respective segment.
+	 */
+	addr_base = p_lsfm->lsb_header.ucode_off;
+	g->acr->get_wpr_info(g, &wpr_inf);
+	addr_base += wpr_inf.wpr_base;
+	nvgpu_acr_dbg(g, "pmu loader cfg u32 addrbase %x\n", (u32)addr_base);
+	/*From linux*/
+	tmp = (addr_base +
+			desc->app_start_offset +
+			desc->app_resident_code_offset) >> 8;
+	nvgpu_assert(tmp <= U32_MAX);
+	addr_code = u64_lo32(tmp);
+	nvgpu_acr_dbg(g, "app start %d app res code off %d\n",
+		desc->app_start_offset, desc->app_resident_code_offset);
+	tmp = (addr_base +
+			desc->app_start_offset +
+			desc->app_resident_data_offset) >> 8;
+	nvgpu_assert(tmp <= U32_MAX);
+	addr_data = u64_lo32(tmp);
+	nvgpu_acr_dbg(g, "app res data offset%d\n",
+		desc->app_resident_data_offset);
+	nvgpu_acr_dbg(g, "bl start off %d\n", desc->bootloader_start_offset);
+
+	/* Populate the loader_config state*/
+	ldr_cfg->dma_idx = g->acr->lsf[FALCON_ID_PMU].falcon_dma_idx;
+	ldr_cfg->code_dma_base = addr_code;
+	ldr_cfg->code_dma_base1 = 0x0;
+	ldr_cfg->code_size_total = desc->app_size;
+	ldr_cfg->code_size_to_load = desc->app_resident_code_size;
+	ldr_cfg->code_entry_point = desc->app_imem_entry;
+	ldr_cfg->data_dma_base = addr_data;
+	ldr_cfg->data_dma_base1 = 0;
+	ldr_cfg->data_size = desc->app_resident_data_size;
+	ldr_cfg->overlay_dma_base = addr_code;
+	ldr_cfg->overlay_dma_base1 = 0x0;
+
+	/* Update the argc/argv members*/
+	ldr_cfg->argc = 1;
+#ifdef CONFIG_NVGPU_LS_PMU
+	nvgpu_pmu_fw_get_cmd_line_args_offset(g, &ldr_cfg->argv);
+#endif
+	*p_bl_gen_desc_size = (u32)sizeof(struct loader_config);
+	return 0;
+}
+
+static int gm20b_flcn_populate_bl_dmem_desc(struct gk20a *g,
+	void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid)
+{
+	struct wpr_carveout_info wpr_inf;
+	struct lsfm_managed_ucode_img_v0 *p_lsfm =
+			(struct lsfm_managed_ucode_img_v0 *)lsfm;
+	struct flcn_ucode_img_v0 *p_img = &(p_lsfm->ucode_img);
+	struct flcn_bl_dmem_desc_v0 *ldr_cfg =
+			&(p_lsfm->bl_gen_desc.bl_dmem_desc);
+	u64 addr_base;
+	struct pmu_ucode_desc *desc;
+	u32 addr_code, addr_data;
+	u64 tmp;
+
+	if (p_img->desc == NULL) {
+		/*
+		 * This means its a header based ucode,
+		 * and so we do not fill BL gen desc structure
+		 */
+		return -EINVAL;
+	}
+	desc = p_img->desc;
+
+	/*
+	 * Calculate physical and virtual addresses for various portions of
+	 * the PMU ucode image
+	 * Calculate the 32-bit addresses for the application code, application
+	 * data, and bootloader code. These values are all based on IM_BASE.
+	 * The 32-bit addresses will be the upper 32-bits of the virtual or
+	 * physical addresses of each respective segment.
+	 */
+	addr_base = p_lsfm->lsb_header.ucode_off;
+	g->acr->get_wpr_info(g, &wpr_inf);
+	addr_base += wpr_inf.wpr_base;
+
+	nvgpu_acr_dbg(g, "gen loader cfg %x u32 addrbase %x ID\n", (u32)addr_base,
+			p_lsfm->wpr_header.falcon_id);
+	tmp = (addr_base +
+			desc->app_start_offset +
+			desc->app_resident_code_offset) >> 8;
+	nvgpu_assert(tmp <= U32_MAX);
+	addr_code = u64_lo32(tmp);
+	tmp = (addr_base +
+			desc->app_start_offset +
+			desc->app_resident_data_offset) >> 8;
+	nvgpu_assert(tmp <= U32_MAX);
+	addr_data = u64_lo32(tmp);
+
+	nvgpu_acr_dbg(g, "gen cfg %x u32 addrcode %x & data %x load offset %xID\n",
+		(u32)addr_code, (u32)addr_data, desc->bootloader_start_offset,
+		p_lsfm->wpr_header.falcon_id);
+
+	/* Populate the LOADER_CONFIG state */
+	(void) memset((void *) ldr_cfg, 0, sizeof(struct flcn_bl_dmem_desc_v0));
+	ldr_cfg->ctx_dma = g->acr->lsf[falconid].falcon_dma_idx;
+	ldr_cfg->code_dma_base = addr_code;
+	ldr_cfg->non_sec_code_size = desc->app_resident_code_size;
+	ldr_cfg->data_dma_base = addr_data;
+	ldr_cfg->data_size = desc->app_resident_data_size;
+	ldr_cfg->code_entry_point = desc->app_imem_entry;
+	*p_bl_gen_desc_size = (u32)sizeof(struct flcn_bl_dmem_desc_v0);
+	return 0;
+}
+
+/* Populate falcon boot loader generic desc.*/
+static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g,
+		struct lsfm_managed_ucode_img_v0 *pnode)
+{
+	int err = -ENOENT;
+
+	if (pnode->wpr_header.falcon_id != FALCON_ID_PMU) {
+		nvgpu_acr_dbg(g, "non pmu. write flcn bl gen desc\n");
+		err = gm20b_flcn_populate_bl_dmem_desc(g,
+				pnode, &pnode->bl_gen_desc_size,
+				pnode->wpr_header.falcon_id);
+		if (err != 0) {
+			nvgpu_err(g, "flcn_populate_bl_dmem_desc failed=%d",
+				err);
+		}
+		return err;
+	}
+
+	if (pnode->wpr_header.falcon_id == FALCON_ID_PMU) {
+		nvgpu_acr_dbg(g, "pmu write flcn bl gen desc\n");
+		err = gm20b_pmu_populate_loader_cfg(g, pnode,
+				&pnode->bl_gen_desc_size);
+		if (err != 0) {
+			nvgpu_err(g, "pmu_populate_loader_cfg failed=%d",
+				err);
+		}
+		return err;
+	}
+
+	/* Failed to find the falcon requested. */
+	return err;
+}
+
+static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr_v0 *plsfm,
+	struct nvgpu_mem *ucode)
+{
+	struct lsfm_managed_ucode_img_v0 *pnode = plsfm->ucode_img_list;
+	struct lsf_wpr_header_v0 last_wpr_hdr;
+	u32 i;
+	int err = 0;
+
+	/* The WPR array is at the base of the WPR */
+	pnode = plsfm->ucode_img_list;
+	(void) memset(&last_wpr_hdr, 0, sizeof(struct lsf_wpr_header_v0));
+	i = 0;
+
+	/*
+	 * Walk the managed falcons, flush WPR and LSB headers to FB.
+	 * flush any bl args to the storage area relative to the
+	 * ucode image (appended on the end as a DMEM area).
+	 */
+	while (pnode != NULL) {
+		/* Flush WPR header to memory*/
+		nvgpu_mem_wr_n(g, ucode, i * (u32)sizeof(pnode->wpr_header),
+				&pnode->wpr_header,
+				(u32)sizeof(pnode->wpr_header));
+
+		nvgpu_acr_dbg(g, "wpr header");
+		nvgpu_acr_dbg(g, "falconid :%d",
+				pnode->wpr_header.falcon_id);
+		nvgpu_acr_dbg(g, "lsb_offset :%x",
+				pnode->wpr_header.lsb_offset);
+		nvgpu_acr_dbg(g, "bootstrap_owner :%d",
+			pnode->wpr_header.bootstrap_owner);
+		nvgpu_acr_dbg(g, "lazy_bootstrap :%d",
+				pnode->wpr_header.lazy_bootstrap);
+		nvgpu_acr_dbg(g, "status :%d",
+				pnode->wpr_header.status);
+
+		/*Flush LSB header to memory*/
+		nvgpu_mem_wr_n(g, ucode, pnode->wpr_header.lsb_offset,
+				&pnode->lsb_header,
+				(u32)sizeof(pnode->lsb_header));
+
+		nvgpu_acr_dbg(g, "lsb header");
+		nvgpu_acr_dbg(g, "ucode_off :%x",
+				pnode->lsb_header.ucode_off);
+		nvgpu_acr_dbg(g, "ucode_size :%x",
+				pnode->lsb_header.ucode_size);
+		nvgpu_acr_dbg(g, "data_size :%x",
+				pnode->lsb_header.data_size);
+		nvgpu_acr_dbg(g, "bl_code_size :%x",
+				pnode->lsb_header.bl_code_size);
+		nvgpu_acr_dbg(g, "bl_imem_off :%x",
+				pnode->lsb_header.bl_imem_off);
+		nvgpu_acr_dbg(g, "bl_data_off :%x",
+				pnode->lsb_header.bl_data_off);
+		nvgpu_acr_dbg(g, "bl_data_size :%x",
+				pnode->lsb_header.bl_data_size);
+		nvgpu_acr_dbg(g, "app_code_off :%x",
+				pnode->lsb_header.app_code_off);
+		nvgpu_acr_dbg(g, "app_code_size :%x",
+				pnode->lsb_header.app_code_size);
+		nvgpu_acr_dbg(g, "app_data_off :%x",
+				pnode->lsb_header.app_data_off);
+		nvgpu_acr_dbg(g, "app_data_size :%x",
+				pnode->lsb_header.app_data_size);
+		nvgpu_acr_dbg(g, "flags :%x",
+				pnode->lsb_header.flags);
+
+		/* this falcon has a boot loader and related args, flush them */
+		/* Populate gen bl and flush to memory */
+		err = lsfm_fill_flcn_bl_gen_desc(g, pnode);
+		if (err != 0) {
+			nvgpu_err(g, "bl_gen_desc failed err=%d", err);
+			return err;
+		}
+		nvgpu_mem_wr_n(g, ucode,
+				pnode->lsb_header.bl_data_off,
+				&pnode->bl_gen_desc,
+				pnode->bl_gen_desc_size);
+
+		/* Copying of ucode */
+		nvgpu_mem_wr_n(g, ucode, pnode->lsb_header.ucode_off,
+				pnode->ucode_img.data,
+				pnode->ucode_img.data_size);
+		pnode = pnode->next;
+		i++;
+	}
+
+	/* Tag the terminator WPR header with an invalid falcon ID. */
+	last_wpr_hdr.falcon_id = FALCON_ID_INVALID;
+	nvgpu_mem_wr_n(g, ucode,
+			(u32)plsfm->managed_flcn_cnt *
+				(u32)sizeof(struct lsf_wpr_header_v0),
+			&last_wpr_hdr,
+			(u32)sizeof(struct lsf_wpr_header_v0));
+	return err;
+}
+
+/* Free any ucode image structure resources. */
+static void lsfm_free_ucode_img_res(struct gk20a *g,
+	struct flcn_ucode_img_v0 *p_img)
+{
+	if (p_img->lsf_desc != NULL) {
+		nvgpu_kfree(g, p_img->lsf_desc);
+		p_img->lsf_desc = NULL;
+	}
+}
+
+/* Free any ucode image structure resources. */
+static void lsfm_free_nonpmu_ucode_img_res(struct gk20a *g,
+	struct flcn_ucode_img_v0 *p_img)
+{
+	if (p_img->lsf_desc != NULL) {
+		nvgpu_kfree(g, p_img->lsf_desc);
+		p_img->lsf_desc = NULL;
+	}
+	if (p_img->desc != NULL) {
+		nvgpu_kfree(g, p_img->desc);
+		p_img->desc = NULL;
+	}
+}
+
+static void free_acr_resources(struct gk20a *g, struct ls_flcn_mgr_v0 *plsfm)
+{
+	u32 cnt = plsfm->managed_flcn_cnt;
+	struct lsfm_managed_ucode_img_v0 *mg_ucode_img;
+	while (cnt != 0U) {
+		mg_ucode_img = plsfm->ucode_img_list;
+		if (mg_ucode_img->ucode_img.lsf_desc->falcon_id ==
+				FALCON_ID_PMU) {
+			lsfm_free_ucode_img_res(g, &mg_ucode_img->ucode_img);
+		} else {
+			lsfm_free_nonpmu_ucode_img_res(g,
+				&mg_ucode_img->ucode_img);
+		}
+		plsfm->ucode_img_list = mg_ucode_img->next;
+		nvgpu_kfree(g, mg_ucode_img);
+		cnt--;
+	}
+}
+
+int nvgpu_acr_prepare_ucode_blob_v0(struct gk20a *g)
+{
+	int err = 0;
+	struct ls_flcn_mgr_v0 lsfm_l, *plsfm;
+	struct wpr_carveout_info wpr_inf;
+	struct nvgpu_gr_falcon *gr_falcon = nvgpu_gr_get_falcon_ptr(g);
+
+	if (g->acr->ucode_blob.cpu_va != NULL) {
+		/* Recovery case, we do not need to form non WPR blob */
+		return err;
+	}
+	plsfm = &lsfm_l;
+	(void) memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr_v0));
+	nvgpu_acr_dbg(g, "fetching GMMU regs\n");
+	err = g->ops.fb.vpr_info_fetch(g);
+	if (err != 0) {
+		nvgpu_err(g, "fb.vpr_info_fetch failed err=%d", err);
+		return err;
+	}
+
+	err = nvgpu_gr_falcon_init_ctxsw_ucode(g, gr_falcon);
+	if (err != 0) {
+		nvgpu_err(g, "gr_falcon_init_ctxsw_ucode failed err=%d", err);
+		return err;
+	}
+
+	g->acr->get_wpr_info(g, &wpr_inf);
+	nvgpu_acr_dbg(g, "wpr carveout base:%llx\n", wpr_inf.wpr_base);
+	nvgpu_acr_dbg(g, "wpr carveout size :%llx\n", wpr_inf.size);
+
+	/* Discover all managed falcons*/
+	err = lsfm_discover_ucode_images(g, plsfm);
+	nvgpu_acr_dbg(g, " Managed Falcon cnt %d\n", plsfm->managed_flcn_cnt);
+	if (err != 0) {
+		goto exit_err;
+	}
+
+	if ((plsfm->managed_flcn_cnt != 0U) &&
+		(g->acr->ucode_blob.cpu_va == NULL)) {
+		/* Generate WPR requirements */
+		err = lsf_gen_wpr_requirements(g, plsfm);
+		if (err != 0) {
+			goto exit_err;
+		}
+
+		/* Alloc memory to hold ucode blob contents */
+		err = g->acr->alloc_blob_space(g, plsfm->wpr_size
+				, &g->acr->ucode_blob);
+		if (err != 0) {
+			goto exit_err;
+		}
+
+		nvgpu_acr_dbg(g, "managed LS falcon %d, WPR size %d bytes.\n",
+			plsfm->managed_flcn_cnt, plsfm->wpr_size);
+		err = lsfm_init_wpr_contents(g, plsfm, &g->acr->ucode_blob);
+		if (err != 0) {
+			nvgpu_kfree(g, &g->acr->ucode_blob);
+			goto free_acr;
+		}
+	} else {
+		nvgpu_acr_dbg(g, "LSFM is managing no falcons.\n");
+	}
+	nvgpu_acr_dbg(g, "prepare ucode blob return 0\n");
+
+free_acr:
+	free_acr_resources(g, plsfm);
+exit_err:
+	return err;
+}
--- a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.h
+++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef ACR_BLOB_CONSTRUCT_V0_H
+#define ACR_BLOB_CONSTRUCT_V0_H
+
+#include <nvgpu/falcon.h>
+#include <nvgpu/flcnif_cmn.h>
+
+/*
+ * Light Secure WPR Content Alignments
+ */
+#define LSF_WPR_HEADER_ALIGNMENT        (256U)
+#define LSF_SUB_WPR_HEADER_ALIGNMENT    (256U)
+#define LSF_LSB_HEADER_ALIGNMENT        (256U)
+#define LSF_BL_DATA_ALIGNMENT           (256U)
+#define LSF_BL_DATA_SIZE_ALIGNMENT      (256U)
+#define LSF_BL_CODE_SIZE_ALIGNMENT      (256U)
+#define LSF_DATA_SIZE_ALIGNMENT         (256U)
+#define LSF_CODE_SIZE_ALIGNMENT         (256U)
+
+#define LSF_UCODE_DATA_ALIGNMENT 4096U
+
+
+/* Defined for 1MB alignment */
+#define SHIFT_1MB	(20U)
+#define SHIFT_4KB	(12U)
+
+/*Light Secure Bootstrap header related defines*/
+#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_FALSE       0U
+#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE        BIT32(0)
+#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_FALSE       0U
+#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE        BIT32(2)
+#define NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE       BIT32(3)
+#define NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_FALSE      (0U)
+
+/*
+ * Image Status Defines
+ */
+#define LSF_IMAGE_STATUS_NONE                           (0U)
+#define LSF_IMAGE_STATUS_COPY                           (1U)
+#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED         (2U)
+#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED         (3U)
+#define LSF_IMAGE_STATUS_VALIDATION_DONE                (4U)
+#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED             (5U)
+#define LSF_IMAGE_STATUS_BOOTSTRAP_READY                (6U)
+
+/*
+ * Light Secure WPR Header
+ * Defines state allowing Light Secure Falcon bootstrapping.
+ */
+struct lsf_wpr_header_v0 {
+	u32 falcon_id;
+	u32 lsb_offset;
+	u32 bootstrap_owner;
+	u32 lazy_bootstrap;
+	u32 status;
+};
+
+/*
+ * Light Secure Falcon Ucode Description Defines
+ * This structure is prelim and may change as the ucode signing flow evolves.
+ */
+struct lsf_ucode_desc_v0 {
+	u8  prd_keys[2][16];
+	u8  dbg_keys[2][16];
+	u32 b_prd_present;
+	u32 b_dbg_present;
+	u32 falcon_id;
+};
+
+/*
+ * Light Secure Bootstrap Header
+ * Defines state allowing Light Secure Falcon bootstrapping.
+ */
+struct lsf_lsb_header_v0 {
+	struct lsf_ucode_desc_v0 signature;
+	u32 ucode_off;
+	u32 ucode_size;
+	u32 data_size;
+	u32 bl_code_size;
+	u32 bl_imem_off;
+	u32 bl_data_off;
+	u32 bl_data_size;
+	u32 app_code_off;
+	u32 app_code_size;
+	u32 app_data_off;
+	u32 app_data_size;
+	u32 flags;
+};
+
+/*
+ * Union of all supported structures used by bootloaders.
+ */
+/* Falcon BL interfaces */
+/*
+ * Structure used by the boot-loader to load the rest of the code. This has
+ * to be filled by NVGPU and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ */
+struct flcn_bl_dmem_desc_v0 {
+	u32    reserved[4];        /*Should be the first element..*/
+	u32    signature[4];        /*Should be the first element..*/
+	u32    ctx_dma;
+	u32    code_dma_base;
+	u32    non_sec_code_off;
+	u32    non_sec_code_size;
+	u32    sec_code_off;
+	u32    sec_code_size;
+	u32    code_entry_point;
+	u32    data_dma_base;
+	u32    data_size;
+	u32    code_dma_base1;
+	u32    data_dma_base1;
+};
+
+/*
+ * Legacy structure used by the current PMU bootloader.
+ */
+struct loader_config {
+	u32 dma_idx;
+	u32 code_dma_base;     /* upper 32-bits of 40-bit dma address */
+	u32 code_size_total;
+	u32 code_size_to_load;
+	u32 code_entry_point;
+	u32 data_dma_base;     /* upper 32-bits of 40-bit dma address */
+	u32 data_size;         /* initialized data of the application  */
+	u32 overlay_dma_base;  /* upper 32-bits of the 40-bit dma address */
+	u32 argc;
+	u32 argv;
+	u16 code_dma_base1;    /* upper 7 bits of 47-bit dma address */
+	u16 data_dma_base1;    /* upper 7 bits of 47-bit dma address */
+	u16 overlay_dma_base1; /* upper 7 bits of the 47-bit dma address */
+};
+
+union flcn_bl_generic_desc {
+	struct flcn_bl_dmem_desc_v0 bl_dmem_desc;
+	struct loader_config loader_cfg;
+};
+
+struct flcn_ucode_img_v0 {
+	u32  *data;
+	struct pmu_ucode_desc *desc; /* only some falcons have descriptor */
+	u32  data_size;
+	/* NULL if not a light secure falcon. */
+	struct lsf_ucode_desc_v0 *lsf_desc;
+	/* True if there a resources to freed by the client. */
+};
+
+/*
+ * LSFM Managed Ucode Image
+ * next             : Next image the list, NULL if last.
+ * wpr_header       : WPR header for this ucode image
+ * lsb_header       : LSB header for this ucode image
+ * bl_gen_desc      : Bootloader generic desc structure for this ucode image
+ * bl_gen_desc_size : Sizeof bootloader desc structure for this ucode image
+ * full_ucode_size  : Surface size required for final ucode image
+ * ucode_img        : Ucode image info
+ */
+struct lsfm_managed_ucode_img_v0 {
+	struct lsfm_managed_ucode_img_v0 *next;
+	struct lsf_wpr_header_v0 wpr_header;
+	struct lsf_lsb_header_v0 lsb_header;
+	union flcn_bl_generic_desc bl_gen_desc;
+	u32 bl_gen_desc_size;
+	u32 full_ucode_size;
+	struct flcn_ucode_img_v0 ucode_img;
+};
+
+/*
+ * Defines the structure used to contain all generic information related to
+ * the LSFM.
+ *
+ * Contains the Light Secure Falcon Manager (LSFM) feature related data.
+ */
+struct ls_flcn_mgr_v0 {
+	u16 managed_flcn_cnt;
+	u32 wpr_size;
+	struct lsfm_managed_ucode_img_v0 *ucode_img_list;
+};
+
+int nvgpu_acr_lsf_pmu_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img);
+int nvgpu_acr_lsf_fecs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img);
+int nvgpu_acr_lsf_gpccs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img);
+
+int nvgpu_acr_prepare_ucode_blob_v0(struct gk20a *g);
+
+#endif /* ACR_BLOB_CONSTRUCT_V0_H */
--- a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c
+++ b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/types.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/firmware.h>
+#include <nvgpu/pmu.h>
+#include <nvgpu/falcon.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/acr.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/soc.h>
+
+#include "acr_bootstrap.h"
+#include "acr_priv.h"
+
+int nvgpu_acr_wait_for_completion(struct gk20a *g, struct hs_acr *acr_desc,
+	u32 timeout)
+{
+	u32 flcn_id;
+#ifdef CONFIG_NVGPU_FALCON_NON_FUSA
+	u32 sctl, cpuctl;
+#endif
+	int completion = 0;
+	u32 data = 0;
+	u32 bar0_status = 0;
+	u32 error_type;
+
+	nvgpu_log_fn(g, " ");
+
+	flcn_id = nvgpu_falcon_get_id(acr_desc->acr_flcn);
+
+	completion = nvgpu_falcon_wait_for_halt(acr_desc->acr_flcn, timeout);
+	if (completion != 0) {
+		nvgpu_err(g, "flcn-%d: HS ucode boot timed out, limit: %d ms",
+				flcn_id, timeout);
+		error_type = ACR_BOOT_TIMEDOUT;
+		goto exit;
+	}
+
+	if (acr_desc->acr_engine_bus_err_status != NULL) {
+		completion = acr_desc->acr_engine_bus_err_status(g,
+			&bar0_status, &error_type);
+		if (completion != 0) {
+			nvgpu_err(g, "flcn-%d: ACR engine bus error", flcn_id);
+			goto exit;
+		}
+	}
+
+	data = nvgpu_falcon_mailbox_read(acr_desc->acr_flcn, FALCON_MAILBOX_0);
+	if (data != 0U) {
+		nvgpu_err(g, "flcn-%d: HS ucode boot failed, err %x", flcn_id,
+				data);
+		nvgpu_err(g, "flcn-%d: Mailbox-1 : 0x%x", flcn_id,
+				nvgpu_falcon_mailbox_read(acr_desc->acr_flcn,
+				FALCON_MAILBOX_1));
+		completion = -EAGAIN;
+		error_type = ACR_BOOT_FAILED;
+		goto exit;
+	}
+
+	/*
+	 * When engine-falcon is used for ACR bootstrap, validate the integrity
+	 * of falcon IMEM and DMEM.
+	 */
+	if (acr_desc->acr_validate_mem_integrity != NULL) {
+		if (!acr_desc->acr_validate_mem_integrity(g)) {
+			nvgpu_err(g, "flcn-%d: memcheck failed", flcn_id);
+			completion = -EAGAIN;
+			error_type = ACR_BOOT_FAILED;
+		}
+	}
+
+exit:
+
+#ifdef CONFIG_NVGPU_FALCON_NON_FUSA
+	nvgpu_falcon_get_ctls(acr_desc->acr_flcn, &sctl, &cpuctl);
+
+	nvgpu_acr_dbg(g, "flcn-%d: sctl reg %x cpuctl reg %x",
+			flcn_id, sctl, cpuctl);
+#endif
+
+	if (completion != 0) {
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+		nvgpu_falcon_dump_stats(acr_desc->acr_flcn);
+#endif
+		if (acr_desc->report_acr_engine_bus_err_status != NULL) {
+			acr_desc->report_acr_engine_bus_err_status(g,
+				bar0_status, error_type);
+		}
+	}
+
+	return completion;
+}
+
+/*
+ * Patch signatures into ucode image
+ */
+static void acr_ucode_patch_sig(struct gk20a *g,
+	unsigned int *p_img, unsigned int *p_prod_sig,
+	unsigned int *p_dbg_sig, unsigned int *p_patch_loc,
+	unsigned int *p_patch_ind, u32 sig_size)
+{
+#if defined(CONFIG_NVGPU_NEXT)
+	struct nvgpu_acr *acr = g->acr;
+#endif
+	unsigned int i, j, *p_sig;
+	const u32 dmem_word_size = 4U;
+	nvgpu_acr_dbg(g, " ");
+
+	if (!g->ops.pmu.is_debug_mode_enabled(g)) {
+		p_sig = p_prod_sig;
+		nvgpu_acr_dbg(g, "PRODUCTION MODE\n");
+	} else {
+		p_sig = p_dbg_sig;
+		nvgpu_info(g, "DEBUG MODE\n");
+	}
+
+#if defined(CONFIG_NVGPU_NEXT)
+	if (acr->get_versioned_sig != NULL) {
+		p_sig = acr->get_versioned_sig(g, acr, p_sig, &sig_size);
+	}
+#endif
+
+	/* Patching logic:*/
+	sig_size = sig_size / dmem_word_size;
+	for (i = 0U; i < (sizeof(*p_patch_loc) / dmem_word_size); i++) {
+		for (j = 0U; j < sig_size; j++) {
+			p_img[nvgpu_safe_add_u32(
+				(p_patch_loc[i] / dmem_word_size), j)] =
+				p_sig[nvgpu_safe_add_u32(
+					(p_patch_ind[i] * dmem_word_size), j)];
+		}
+	}
+}
+
+/*
+ * Loads ACR bin to SYSMEM/FB and bootstraps ACR with bootloader code
+ * start and end are addresses of ucode blob in non-WPR region
+ */
+int nvgpu_acr_bootstrap_hs_ucode(struct gk20a *g, struct nvgpu_acr *acr,
+	struct hs_acr *acr_desc)
+{
+	struct nvgpu_firmware *acr_fw = acr_desc->acr_fw;
+	struct bin_hdr *hs_bin_hdr = NULL;
+	struct acr_fw_header *fw_hdr = NULL;
+	u32 *ucode_header = NULL;
+	u32 *ucode = NULL;
+	u32 timeout = 0;
+	int err = 0;
+
+	nvgpu_acr_dbg(g, "ACR TYPE %x ", acr_desc->acr_type);
+
+	if (acr_fw != NULL) {
+		err = acr->patch_wpr_info_to_ucode(g, acr, acr_desc, true);
+		if (err != 0) {
+			nvgpu_err(g, "Falcon ucode patch wpr info failed");
+			return err;
+                }
+	} else {
+		/* Firmware is stored in soc specific path in FMODEL
+		 * Hence NVGPU_REQUEST_FIRMWARE_NO_WARN is used instead
+		 * of NVGPU_REQUEST_FIRMWARE_NO_SOC
+		 */
+#ifdef CONFIG_NVGPU_SIM
+		if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
+			acr_fw = nvgpu_request_firmware(g,
+					acr_desc->acr_fw_name,
+					NVGPU_REQUEST_FIRMWARE_NO_WARN);
+		} else
+#endif
+		{
+			acr_fw = nvgpu_request_firmware(g,
+					acr_desc->acr_fw_name,
+					NVGPU_REQUEST_FIRMWARE_NO_SOC);
+		}
+		if (acr_fw == NULL) {
+			nvgpu_err(g, "%s ucode get fail for %s",
+				acr_desc->acr_fw_name, g->name);
+			return -ENOENT;
+		}
+
+		acr_desc->acr_fw = acr_fw;
+
+		err = acr->patch_wpr_info_to_ucode(g, acr, acr_desc, false);
+		if (err != 0) {
+			nvgpu_err(g, "Falcon ucode patch wpr info failed");
+			goto err_free_ucode;
+                }
+	}
+
+
+	hs_bin_hdr = (struct bin_hdr *)(void *)acr_fw->data;
+	fw_hdr = (struct acr_fw_header *)(void *)(acr_fw->data +
+			hs_bin_hdr->header_offset);
+	ucode_header = (u32 *)(void *)(acr_fw->data + fw_hdr->hdr_offset);
+	ucode = (u32 *)(void *)(acr_fw->data + hs_bin_hdr->data_offset);
+
+	/* Patch Ucode signatures */
+	acr_ucode_patch_sig(g, ucode,
+		(u32 *)(void *)(acr_fw->data + fw_hdr->sig_prod_offset),
+		(u32 *)(void *)(acr_fw->data + fw_hdr->sig_dbg_offset),
+		(u32 *)(void *)(acr_fw->data + fw_hdr->patch_loc),
+		(u32 *)(void *)(acr_fw->data + fw_hdr->patch_sig),
+		fw_hdr->sig_dbg_size);
+
+	err = nvgpu_falcon_hs_ucode_load_bootstrap(acr_desc->acr_flcn,
+			ucode, ucode_header);
+	if (err != 0) {
+		nvgpu_err(g, "HS ucode load & bootstrap failed");
+		goto err_free_ucode;
+	}
+
+	/* wait for complete & halt */
+	if (nvgpu_platform_is_silicon(g)) {
+		timeout = ACR_COMPLETION_TIMEOUT_SILICON_MS;
+	} else {
+		timeout = ACR_COMPLETION_TIMEOUT_NON_SILICON_MS;
+	}
+	err = nvgpu_acr_wait_for_completion(g, acr_desc, timeout);
+
+	if (err != 0) {
+		nvgpu_err(g, "HS ucode completion err %d", err);
+		goto err_free_ucode;
+	}
+
+	return 0;
+
+err_free_ucode:
+	nvgpu_release_firmware(g, acr_fw);
+	acr_desc->acr_fw = NULL;
+	return err;
+}
--- a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h
+++ b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef ACR_BOOTSTRAP_H
+#define ACR_BOOTSTRAP_H
+
+#include "nvgpu_acr_interface.h"
+#ifdef CONFIG_NVGPU_NEXT
+#include "common/acr/nvgpu_next_acr_bootstrap.h"
+#endif
+
+struct gk20a;
+struct nvgpu_acr;
+
+struct flcn_acr_region_prop_v0 {
+	u32 start_addr;
+	u32 end_addr;
+	u32 region_id;
+	u32 read_mask;
+	u32 write_mask;
+	u32 client_mask;
+};
+
+struct flcn_acr_regions_v0 {
+	u32 no_regions;
+	struct flcn_acr_region_prop_v0 region_props[NVGPU_FLCN_ACR_MAX_REGIONS];
+};
+
+struct flcn_acr_desc_v0 {
+	union {
+		u32 reserved_dmem[(LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE/4)];
+		u32 signatures[4];
+	} ucode_reserved_space;
+	/*Always 1st*/
+	u32 wpr_region_id;
+	u32 wpr_offset;
+	u32 mmu_mem_range;
+	struct flcn_acr_regions_v0 regions;
+	u32 nonwpr_ucode_blob_size;
+	u64 nonwpr_ucode_blob_start;
+};
+
+struct bin_hdr {
+	/* 0x10de */
+	u32 bin_magic;
+	/* versioning of bin format */
+	u32 bin_ver;
+	/* Entire image size including this header */
+	u32 bin_size;
+	/*
+	 * Header offset of executable binary metadata,
+	 * start @ offset- 0x100 *
+	 */
+	u32 header_offset;
+	/*
+	 * Start of executable binary data, start @
+	 * offset- 0x200
+	 */
+	u32 data_offset;
+	/* Size of executable binary */
+	u32 data_size;
+};
+
+struct acr_fw_header {
+	u32 sig_dbg_offset;
+	u32 sig_dbg_size;
+	u32 sig_prod_offset;
+	u32 sig_prod_size;
+	u32 patch_loc;
+	u32 patch_sig;
+	u32 hdr_offset; /* This header points to acr_ucode_header_t210_load */
+	u32 hdr_size; /* Size of above header */
+};
+
+/* ACR Falcon descriptor's */
+struct hs_acr {
+#define ACR_DEFAULT		0U
+#define ACR_AHESASC_NON_FUSA	1U
+#define ACR_ASB_NON_FUSA	2U
+#define ACR_AHESASC_FUSA	3U
+#define ACR_ASB_FUSA		4U
+	u32 acr_type;
+
+	/* ACR ucode */
+	const char *acr_fw_name;
+	const char *acr_code_name;
+	const char *acr_data_name;
+	const char *acr_manifest_name;
+	struct nvgpu_firmware *code_fw;
+	struct nvgpu_firmware *data_fw;
+	struct nvgpu_firmware *manifest_fw;
+	struct nvgpu_firmware *acr_fw;
+
+	union{
+		struct flcn_acr_desc_v0 *acr_dmem_desc_v0;
+		struct flcn_acr_desc *acr_dmem_desc;
+	};
+
+#if defined(CONFIG_NVGPU_NEXT)
+	struct nvgpu_mem acr_falcon2_sysmem_desc;
+	struct flcn2_acr_desc acr_sysmem_desc;
+	struct nvgpu_mem ls_pmu_desc;
+#endif
+
+	/* Falcon used to execute ACR ucode */
+	struct nvgpu_falcon *acr_flcn;
+
+	void (*report_acr_engine_bus_err_status)(struct gk20a *g,
+		u32 bar0_status, u32 error_type);
+	int (*acr_engine_bus_err_status)(struct gk20a *g, u32 *bar0_status,
+		u32 *error_type);
+	bool (*acr_validate_mem_integrity)(struct gk20a *g);
+};
+
+int nvgpu_acr_wait_for_completion(struct gk20a *g, struct hs_acr *acr_desc,
+	u32 timeout);
+int nvgpu_acr_bootstrap_hs_ucode(struct gk20a *g, struct nvgpu_acr *acr,
+	struct hs_acr *acr_desc);
+
+#endif /* ACR_BOOTSTRAP_H */
--- a/drivers/gpu/nvgpu/common/acr/acr_priv.h
+++ b/drivers/gpu/nvgpu/common/acr/acr_priv.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef ACR_H
+#define ACR_H
+
+#include "acr_bootstrap.h"
+#ifdef CONFIG_NVGPU_ACR_LEGACY
+#include "acr_blob_construct_v0.h"
+#endif
+#include "acr_blob_construct.h"
+
+struct gk20a;
+struct nvgpu_acr;
+struct wpr_carveout_info;
+
+#define nvgpu_acr_dbg(g, fmt, args...) \
+	nvgpu_log(g, gpu_dbg_pmu, fmt, ##args)
+
+/*
+ * Falcon UCODE header index.
+ */
+#define FLCN_NL_UCODE_HDR_OS_CODE_OFF_IND              (0U)
+#define FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND             (1U)
+#define FLCN_NL_UCODE_HDR_OS_DATA_OFF_IND              (2U)
+#define FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND             (3U)
+#define FLCN_NL_UCODE_HDR_NUM_APPS_IND                 (4U)
+
+/*
+ * There are total N number of Apps with code and offset defined in UCODE header
+ * This macro provides the CODE and DATA offset and size of Ath application.
+ */
+#define FLCN_NL_UCODE_HDR_APP_CODE_START_IND           (5U)
+#define FLCN_NL_UCODE_HDR_APP_CODE_OFF_IND(N, A) \
+	(FLCN_NL_UCODE_HDR_APP_CODE_START_IND + ((A)*2U))
+#define FLCN_NL_UCODE_HDR_APP_CODE_SIZE_IND(N, A) \
+	(FLCN_NL_UCODE_HDR_APP_CODE_START_IND + ((A)*2U) + 1U)
+#define FLCN_NL_UCODE_HDR_APP_CODE_END_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_CODE_START_IND + ((N)*2U) - 1U)
+
+#define FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_CODE_END_IND(N) + 1U)
+#define FLCN_NL_UCODE_HDR_APP_DATA_OFF_IND(N, A) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + ((A)*2U))
+#define FLCN_NL_UCODE_HDR_APP_DATA_SIZE_IND(N, A) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + ((A)*2U) + 1U)
+#define FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + ((N)*2U) - 1U)
+
+#define FLCN_NL_UCODE_HDR_OS_OVL_OFF_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) + 1U)
+#define FLCN_NL_UCODE_HDR_OS_OVL_SIZE_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) + 2U)
+
+#define GM20B_HSBIN_ACR_PROD_UCODE "nv_acr_ucode_prod.bin"
+#define GM20B_HSBIN_ACR_DBG_UCODE "nv_acr_ucode_dbg.bin"
+#define HSBIN_ACR_BL_UCODE_IMAGE "pmu_bl.bin"
+#define HSBIN_ACR_PROD_UCODE "acr_ucode_prod.bin"
+#define HSBIN_ACR_DBG_UCODE "acr_ucode_dbg.bin"
+#define HSBIN_ACR_AHESASC_NON_FUSA_PROD_UCODE "acr_ahesasc_prod_ucode.bin"
+#define HSBIN_ACR_ASB_NON_FUSA_PROD_UCODE "acr_asb_prod_ucode.bin"
+#define HSBIN_ACR_AHESASC_NON_FUSA_DBG_UCODE "acr_ahesasc_dbg_ucode.bin"
+#define HSBIN_ACR_ASB_NON_FUSA_DBG_UCODE "acr_asb_dbg_ucode.bin"
+
+#define HSBIN_ACR_AHESASC_FUSA_PROD_UCODE "acr_ahesasc_fusa_prod_ucode.bin"
+#define HSBIN_ACR_ASB_FUSA_PROD_UCODE "acr_asb_fusa_prod_ucode.bin"
+#define HSBIN_ACR_AHESASC_FUSA_DBG_UCODE "acr_ahesasc_fusa_dbg_ucode.bin"
+#define HSBIN_ACR_ASB_FUSA_DBG_UCODE "acr_asb_fusa_dbg_ucode.bin"
+
+#define GM20B_FECS_UCODE_SIG "fecs_sig.bin"
+#define T18x_GPCCS_UCODE_SIG "gpccs_sig.bin"
+
+#define TU104_FECS_UCODE_SIG "tu104/fecs_sig.bin"
+#define TU104_GPCCS_UCODE_SIG "tu104/gpccs_sig.bin"
+
+#define LSF_SEC2_UCODE_IMAGE_BIN "sec2_ucode_image.bin"
+#define LSF_SEC2_UCODE_DESC_BIN "sec2_ucode_desc.bin"
+#define LSF_SEC2_UCODE_SIG_BIN "sec2_sig.bin"
+
+#define LSF_SEC2_UCODE_IMAGE_FUSA_BIN "sec2_ucode_fusa_image.bin"
+#define LSF_SEC2_UCODE_DESC_FUSA_BIN "sec2_ucode_fusa_desc.bin"
+#define LSF_SEC2_UCODE_SIG_FUSA_BIN "sec2_fusa_sig.bin"
+
+#define ACR_COMPLETION_TIMEOUT_NON_SILICON_MS 10000U /*in msec */
+#define ACR_COMPLETION_TIMEOUT_SILICON_MS 100 /*in msec */
+
+struct acr_lsf_config {
+	u32 falcon_id;
+	u32 falcon_dma_idx;
+	bool is_lazy_bootstrap;
+	bool is_priv_load;
+
+	int (*get_lsf_ucode_details)(struct gk20a *g, void *lsf_ucode_img);
+	void (*get_cmd_line_args_offset)(struct gk20a *g, u32 *args_offset);
+};
+
+struct nvgpu_acr {
+	struct gk20a *g;
+
+	u32 bootstrap_owner;
+	u32 num_of_sig;
+
+	/* LSF properties */
+	u64 lsf_enable_mask;
+	struct acr_lsf_config lsf[FALCON_ID_END];
+
+	/*
+	 * non-wpr space to hold LSF ucodes,
+	 * ACR does copy ucode from non-wpr to wpr
+	 */
+	struct nvgpu_mem ucode_blob;
+	/*
+	 * Even though this mem_desc wouldn't be used,
+	 * the wpr region needs to be reserved in the
+	 * allocator in dGPU case.
+	 */
+	struct nvgpu_mem wpr_dummy;
+
+	/* ACR member for different types of ucode */
+	/* For older dgpu/tegra ACR cuode */
+	struct hs_acr acr;
+	/* ACR load split feature support */
+	struct hs_acr acr_ahesasc;
+	struct hs_acr acr_asb;
+
+	/* ACR load split feature support for iGPU*/
+	struct hs_acr acr_alsb;
+	struct hs_acr acr_asc;
+
+	int (*prepare_ucode_blob)(struct gk20a *g);
+	int (*alloc_blob_space)(struct gk20a *g, size_t size,
+		struct nvgpu_mem *mem);
+	int (*patch_wpr_info_to_ucode)(struct gk20a *g, struct nvgpu_acr *acr,
+		struct hs_acr *acr_desc, bool is_recovery);
+	int (*bootstrap_hs_acr)(struct gk20a *g, struct nvgpu_acr *acr);
+
+	void (*get_wpr_info)(struct gk20a *g, struct wpr_carveout_info *inf);
+	u32* (*get_versioned_sig)(struct gk20a *g, struct nvgpu_acr *acr,
+		u32 *sig, u32 *sig_size);
+};
+
+#endif /* ACR_H */
--- a/drivers/gpu/nvgpu/common/acr/acr_sw_gm20b.c
+++ b/drivers/gpu/nvgpu/common/acr/acr_sw_gm20b.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2015-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/types.h>
+#include <nvgpu/firmware.h>
+#include <nvgpu/falcon.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/pmu/fw.h>
+
+#include "acr_wpr.h"
+#include "acr_priv.h"
+#include "acr_sw_gm20b.h"
+#include "acr_blob_alloc.h"
+#include "acr_bootstrap.h"
+#include "acr_blob_construct_v0.h"
+
+static int gm20b_bootstrap_hs_acr(struct gk20a *g, struct nvgpu_acr *acr)
+{
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	err = nvgpu_acr_bootstrap_hs_ucode(g, g->acr, &g->acr->acr);
+	if (err != 0) {
+		nvgpu_err(g, "ACR bootstrap failed");
+	}
+
+	return err;
+}
+
+static int gm20b_acr_patch_wpr_info_to_ucode(struct gk20a *g,
+	struct nvgpu_acr *acr, struct hs_acr *acr_desc, bool is_recovery)
+{
+	struct nvgpu_firmware *acr_fw = acr_desc->acr_fw;
+	struct acr_fw_header *acr_fw_hdr = NULL;
+	struct bin_hdr *acr_fw_bin_hdr = NULL;
+	struct flcn_acr_desc_v0 *acr_dmem_desc;
+	u32 *acr_ucode_header = NULL;
+	u32 *acr_ucode_data = NULL;
+
+	nvgpu_log_fn(g, " ");
+
+	if (is_recovery) {
+		acr_desc->acr_dmem_desc_v0->nonwpr_ucode_blob_size = 0U;
+	} else {
+		acr_fw_bin_hdr = (struct bin_hdr *)acr_fw->data;
+		acr_fw_hdr = (struct acr_fw_header *)
+			(acr_fw->data + acr_fw_bin_hdr->header_offset);
+
+		acr_ucode_data = (u32 *)(acr_fw->data +
+			acr_fw_bin_hdr->data_offset);
+
+		acr_ucode_header = (u32 *)(acr_fw->data +
+			acr_fw_hdr->hdr_offset);
+
+		/* Patch WPR info to ucode */
+		acr_dmem_desc = (struct flcn_acr_desc_v0 *)
+			&(((u8 *)acr_ucode_data)[acr_ucode_header[2U]]);
+
+		acr_desc->acr_dmem_desc_v0 = acr_dmem_desc;
+
+		acr_dmem_desc->nonwpr_ucode_blob_start =
+			nvgpu_mem_get_addr(g, &g->acr->ucode_blob);
+		nvgpu_assert(g->acr->ucode_blob.size <= U32_MAX);
+		acr_dmem_desc->nonwpr_ucode_blob_size =
+			(u32)g->acr->ucode_blob.size;
+		acr_dmem_desc->regions.no_regions = 1U;
+		acr_dmem_desc->wpr_offset = 0U;
+	}
+
+	return 0;
+}
+
+/* LSF static config functions */
+static u32 gm20b_acr_lsf_pmu(struct gk20a *g,
+		struct acr_lsf_config *lsf)
+{
+	/* PMU LS falcon info */
+	lsf->falcon_id = FALCON_ID_PMU;
+	lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE;
+	lsf->is_lazy_bootstrap = false;
+	lsf->is_priv_load = false;
+#ifdef CONFIG_NVGPU_LS_PMU
+	lsf->get_lsf_ucode_details = nvgpu_acr_lsf_pmu_ucode_details_v0;
+	lsf->get_cmd_line_args_offset = nvgpu_pmu_fw_get_cmd_line_args_offset;
+#endif
+	return BIT32(lsf->falcon_id);
+}
+
+static u32 gm20b_acr_lsf_fecs(struct gk20a *g,
+		struct acr_lsf_config *lsf)
+{
+	/* FECS LS falcon info */
+	lsf->falcon_id = FALCON_ID_FECS;
+	lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE;
+	lsf->is_lazy_bootstrap = false;
+	lsf->is_priv_load = false;
+	lsf->get_lsf_ucode_details = nvgpu_acr_lsf_fecs_ucode_details_v0;
+	lsf->get_cmd_line_args_offset = NULL;
+
+	return BIT32(lsf->falcon_id);
+}
+
+static u32 gm20b_acr_lsf_conifg(struct gk20a *g,
+	struct nvgpu_acr *acr)
+{
+	u32 lsf_enable_mask = 0;
+
+	lsf_enable_mask |= gm20b_acr_lsf_pmu(g, &acr->lsf[FALCON_ID_PMU]);
+	lsf_enable_mask |= gm20b_acr_lsf_fecs(g, &acr->lsf[FALCON_ID_FECS]);
+
+	return lsf_enable_mask;
+}
+
+static void gm20b_acr_default_sw_init(struct gk20a *g, struct hs_acr *hs_acr)
+{
+	nvgpu_log_fn(g, " ");
+
+	/* ACR HS ucode type & f/w name*/
+	hs_acr->acr_type = ACR_DEFAULT;
+
+	if (!g->ops.pmu.is_debug_mode_enabled(g)) {
+		hs_acr->acr_fw_name = GM20B_HSBIN_ACR_PROD_UCODE;
+	} else {
+		hs_acr->acr_fw_name = GM20B_HSBIN_ACR_DBG_UCODE;
+	}
+
+	/* set on which falcon ACR need to execute*/
+	hs_acr->acr_flcn = g->pmu->flcn;
+	hs_acr->acr_engine_bus_err_status =
+		g->ops.pmu.bar0_error_status;
+}
+
+void nvgpu_gm20b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr)
+{
+	nvgpu_log_fn(g, " ");
+
+	acr->g = g;
+
+	acr->bootstrap_owner = FALCON_ID_PMU;
+
+	acr->lsf_enable_mask = gm20b_acr_lsf_conifg(g, acr);
+
+	gm20b_acr_default_sw_init(g, &acr->acr);
+
+	acr->prepare_ucode_blob = nvgpu_acr_prepare_ucode_blob_v0;
+	acr->get_wpr_info = nvgpu_acr_wpr_info_sys;
+	acr->alloc_blob_space = nvgpu_acr_alloc_blob_space_sys;
+	acr->bootstrap_hs_acr = gm20b_bootstrap_hs_acr;
+	acr->patch_wpr_info_to_ucode =
+		gm20b_acr_patch_wpr_info_to_ucode;
+}
--- a/drivers/gpu/nvgpu/common/acr/acr_sw_gm20b.h
+++ b/drivers/gpu/nvgpu/common/acr/acr_sw_gm20b.h
@@ -0,0 +1,33 @@
+/*
+ * GM20B ACR
+ *
+ * Copyright (c) 2015-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef ACR_SW_GM20B_H
+#define ACR_SW_GM20B_H
+
+struct gk20a;
+struct nvgpu_acr;
+
+void nvgpu_gm20b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr);
+
+#endif /*ACR_SW_GM20B_H*/
--- a/drivers/gpu/nvgpu/common/acr/acr_sw_gp10b.c
+++ b/drivers/gpu/nvgpu/common/acr/acr_sw_gp10b.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "acr_sw_gp10b.h"
+
+#include <nvgpu/types.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/pmu.h>
+
+#include "acr_blob_construct_v0.h"
+#include "acr_priv.h"
+
+#include "acr_sw_gm20b.h"
+#include "acr_sw_gp10b.h"
+
+/* LSF static config functions */
+static u32 gp10b_acr_lsf_gpccs(struct gk20a *g,
+		struct acr_lsf_config *lsf)
+{
+	/* GPCCS LS falcon info */
+	lsf->falcon_id = FALCON_ID_GPCCS;
+	lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE;
+	lsf->is_lazy_bootstrap = true;
+	lsf->is_priv_load = true;
+	lsf->get_lsf_ucode_details = nvgpu_acr_lsf_gpccs_ucode_details_v0;
+	lsf->get_cmd_line_args_offset = NULL;
+
+	return BIT32(lsf->falcon_id);
+}
+
+static void gp10b_acr_default_sw_init(struct gk20a *g, struct hs_acr *hs_acr)
+{
+	nvgpu_log_fn(g, " ");
+
+	/* ACR HS ucode type & f/w name*/
+	hs_acr->acr_type = ACR_DEFAULT;
+
+	if (!g->ops.pmu.is_debug_mode_enabled(g)) {
+		hs_acr->acr_fw_name = HSBIN_ACR_PROD_UCODE;
+	} else {
+		hs_acr->acr_fw_name = HSBIN_ACR_DBG_UCODE;
+	}
+
+	/* set on which falcon ACR need to execute*/
+	hs_acr->acr_flcn = g->pmu->flcn;
+	hs_acr->acr_engine_bus_err_status =
+		g->ops.pmu.bar0_error_status;
+}
+
+void nvgpu_gp10b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr)
+{
+	nvgpu_log_fn(g, " ");
+
+	/* inherit the gm20b config data */
+	nvgpu_gm20b_acr_sw_init(g, acr);
+	gp10b_acr_default_sw_init(g, &acr->acr);
+
+	/* gp10b supports LSF gpccs bootstrap */
+	acr->lsf_enable_mask |= gp10b_acr_lsf_gpccs(g,
+		&acr->lsf[FALCON_ID_GPCCS]);
+}
--- a/drivers/gpu/nvgpu/common/acr/acr_sw_gp10b.h
+++ b/drivers/gpu/nvgpu/common/acr/acr_sw_gp10b.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef ACR_SW_GP10B_H
+#define ACR_SW_GP10B_H
+
+struct gk20a;
+struct nvgpu_acr;
+
+void nvgpu_gp10b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr);
+
+#endif /* ACR_SW_GP10B_H */
--- a/drivers/gpu/nvgpu/common/acr/acr_sw_gv11b.c
+++ b/drivers/gpu/nvgpu/common/acr/acr_sw_gv11b.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2017-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/types.h>
+#include <nvgpu/firmware.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/bug.h>
+#ifdef CONFIG_NVGPU_LS_PMU
+#include <nvgpu/pmu/fw.h>
+#endif
+
+#include "acr_wpr.h"
+#include "acr_priv.h"
+#include "acr_blob_alloc.h"
+#include "acr_blob_construct.h"
+#include "acr_bootstrap.h"
+#include "acr_sw_gv11b.h"
+
+#define RECOVERY_UCODE_BLOB_SIZE	(0U)
+#define WPR_OFFSET			(0U)
+#define ACR_REGIONS			(1U)
+
+static int gv11b_bootstrap_hs_acr(struct gk20a *g, struct nvgpu_acr *acr)
+{
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	err = nvgpu_acr_bootstrap_hs_ucode(g, g->acr, &g->acr->acr);
+	if (err != 0) {
+		nvgpu_err(g, "ACR bootstrap failed");
+	}
+
+	return err;
+}
+
+static int gv11b_acr_patch_wpr_info_to_ucode(struct gk20a *g,
+	struct nvgpu_acr *acr, struct hs_acr *acr_desc, bool is_recovery)
+{
+	struct nvgpu_firmware *acr_fw = acr_desc->acr_fw;
+	struct acr_fw_header *acr_fw_hdr = NULL;
+	struct bin_hdr *acr_fw_bin_hdr = NULL;
+	struct flcn_acr_desc *acr_dmem_desc;
+	u32 *acr_ucode_header = NULL;
+	u32 *acr_ucode_data = NULL;
+	const u32 acr_desc_offset = 2U;
+
+	nvgpu_log_fn(g, " ");
+#ifdef CONFIG_NVGPU_NON_FUSA
+	if (is_recovery) {
+		acr_desc->acr_dmem_desc->nonwpr_ucode_blob_size =
+						RECOVERY_UCODE_BLOB_SIZE;
+	} else
+#endif
+	{
+		acr_fw_bin_hdr = (struct bin_hdr *)(void *)acr_fw->data;
+		acr_fw_hdr = (struct acr_fw_header *)(void *)
+			(acr_fw->data + acr_fw_bin_hdr->header_offset);
+
+		acr_ucode_data = (u32 *)(void *)(acr_fw->data +
+			acr_fw_bin_hdr->data_offset);
+		acr_ucode_header = (u32 *)(void *)(acr_fw->data +
+			acr_fw_hdr->hdr_offset);
+
+		/* Patch WPR info to ucode */
+		acr_dmem_desc = (struct flcn_acr_desc *)(void *)
+			&(((u8 *)acr_ucode_data)[acr_ucode_header[acr_desc_offset]]);
+
+		acr_desc->acr_dmem_desc = acr_dmem_desc;
+
+		acr_dmem_desc->nonwpr_ucode_blob_start =
+			nvgpu_mem_get_addr(g, &g->acr->ucode_blob);
+		nvgpu_assert(g->acr->ucode_blob.size <= U32_MAX);
+		acr_dmem_desc->nonwpr_ucode_blob_size =
+			(u32)g->acr->ucode_blob.size;
+		acr_dmem_desc->regions.no_regions = ACR_REGIONS;
+		acr_dmem_desc->wpr_offset = WPR_OFFSET;
+	}
+
+	return 0;
+}
+
+/* LSF static config functions */
+#ifdef CONFIG_NVGPU_LS_PMU
+static u32 gv11b_acr_lsf_pmu(struct gk20a *g,
+		struct acr_lsf_config *lsf)
+{
+	if (!g->support_ls_pmu) {
+		/* skip adding LS PMU ucode to ACR blob */
+		return 0;
+	}
+
+	/* PMU LS falcon info */
+	lsf->falcon_id = FALCON_ID_PMU;
+	lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE;
+	lsf->is_lazy_bootstrap = false;
+	lsf->is_priv_load = false;
+	lsf->get_lsf_ucode_details = nvgpu_acr_lsf_pmu_ucode_details;
+	lsf->get_cmd_line_args_offset = nvgpu_pmu_fw_get_cmd_line_args_offset;
+
+	return BIT32(lsf->falcon_id);
+}
+#endif
+
+/* LSF init */
+static u32 gv11b_acr_lsf_fecs(struct gk20a *g,
+		struct acr_lsf_config *lsf)
+{
+	/* FECS LS falcon info */
+	lsf->falcon_id = FALCON_ID_FECS;
+	lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE;
+	/*
+	 * FECS LSF cold/recovery bootstrap is handled by ACR when LS PMU
+	 * not present
+	 */
+	lsf->is_lazy_bootstrap = g->support_ls_pmu ? true : false;
+	lsf->is_priv_load = false;
+	lsf->get_lsf_ucode_details = nvgpu_acr_lsf_fecs_ucode_details;
+	lsf->get_cmd_line_args_offset = NULL;
+
+	return BIT32(lsf->falcon_id);
+}
+
+static u32 gv11b_acr_lsf_gpccs(struct gk20a *g,
+		struct acr_lsf_config *lsf)
+{
+	/* GPCCS LS falcon info */
+	lsf->falcon_id = FALCON_ID_GPCCS;
+	lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE;
+	/*
+	 * GPCCS LSF cold/recovery bootstrap is handled by ACR when LS PMU
+	 * not present
+	 */
+	lsf->is_lazy_bootstrap = g->support_ls_pmu ? true : false;
+	lsf->is_priv_load = true;
+	lsf->get_lsf_ucode_details = nvgpu_acr_lsf_gpccs_ucode_details;
+	lsf->get_cmd_line_args_offset = NULL;
+
+	return BIT32(lsf->falcon_id);
+}
+
+u32 gv11b_acr_lsf_config(struct gk20a *g,
+	struct nvgpu_acr *acr)
+{
+	u32 lsf_enable_mask = 0;
+#ifdef CONFIG_NVGPU_LS_PMU
+	lsf_enable_mask |= gv11b_acr_lsf_pmu(g, &acr->lsf[FALCON_ID_PMU]);
+#endif
+	lsf_enable_mask |= gv11b_acr_lsf_fecs(g, &acr->lsf[FALCON_ID_FECS]);
+	lsf_enable_mask |= gv11b_acr_lsf_gpccs(g, &acr->lsf[FALCON_ID_GPCCS]);
+
+	return lsf_enable_mask;
+}
+
+static void gv11b_acr_default_sw_init(struct gk20a *g, struct hs_acr *acr_desc)
+{
+	nvgpu_log_fn(g, " ");
+
+	acr_desc->acr_type = ACR_DEFAULT;
+
+	if (!g->ops.pmu.is_debug_mode_enabled(g)) {
+		acr_desc->acr_fw_name = HSBIN_ACR_PROD_UCODE;
+	} else {
+		acr_desc->acr_fw_name = HSBIN_ACR_DBG_UCODE;
+	}
+
+	acr_desc->acr_flcn = g->pmu->flcn;
+	acr_desc->report_acr_engine_bus_err_status =
+		nvgpu_pmu_report_bar0_pri_err_status;
+	acr_desc->acr_engine_bus_err_status =
+		g->ops.pmu.bar0_error_status;
+	acr_desc->acr_validate_mem_integrity = g->ops.pmu.validate_mem_integrity;
+}
+
+void nvgpu_gv11b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr)
+{
+	nvgpu_log_fn(g, " ");
+
+	acr->g = g;
+
+	acr->bootstrap_owner = FALCON_ID_PMU;
+
+	acr->lsf_enable_mask = gv11b_acr_lsf_config(g, acr);
+
+	gv11b_acr_default_sw_init(g, &acr->acr);
+
+	acr->prepare_ucode_blob = nvgpu_acr_prepare_ucode_blob;
+	acr->get_wpr_info = nvgpu_acr_wpr_info_sys;
+	acr->alloc_blob_space = nvgpu_acr_alloc_blob_space_sys;
+	acr->bootstrap_hs_acr = gv11b_bootstrap_hs_acr;
+	acr->patch_wpr_info_to_ucode = gv11b_acr_patch_wpr_info_to_ucode;
+}
--- a/drivers/gpu/nvgpu/common/acr/acr_sw_gv11b.h
+++ b/drivers/gpu/nvgpu/common/acr/acr_sw_gv11b.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef ACR_SW_GV11B_H
+#define ACR_SW_GV11B_H
+
+struct gk20a;
+struct nvgpu_acr;
+struct hs_acr;
+
+void nvgpu_gv11b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr);
+u32 gv11b_acr_lsf_config(struct gk20a *g, struct nvgpu_acr *acr);
+
+#endif /* ACR_SW_GV11B_H */
+
--- a/drivers/gpu/nvgpu/common/acr/acr_sw_tu104.c
+++ b/drivers/gpu/nvgpu/common/acr/acr_sw_tu104.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2016-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "acr_sw_tu104.h"
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/firmware.h>
+
+#include "acr_wpr.h"
+#include "acr_priv.h"
+#include "acr_blob_alloc.h"
+#include "acr_bootstrap.h"
+#include "acr_blob_construct.h"
+#include "acr_sw_gv11b.h"
+#include "acr_sw_tu104.h"
+
+static int tu104_bootstrap_hs_acr(struct gk20a *g, struct nvgpu_acr *acr)
+{
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	err = nvgpu_acr_bootstrap_hs_ucode(g, g->acr, &g->acr->acr_ahesasc);
+	if (err != 0) {
+		nvgpu_err(g, "ACR AHESASC bootstrap failed");
+		goto exit;
+	}
+	err = nvgpu_acr_bootstrap_hs_ucode(g, g->acr, &g->acr->acr_asb);
+	if (err != 0) {
+		nvgpu_err(g, "ACR ASB bootstrap failed");
+		goto exit;
+	}
+
+exit:
+	return err;
+}
+
+/* WPR info update */
+static int tu104_acr_patch_wpr_info_to_ucode(struct gk20a *g,
+	struct nvgpu_acr *acr, struct hs_acr *acr_desc,
+	bool is_recovery)
+{
+	struct nvgpu_firmware *acr_fw = acr_desc->acr_fw;
+	struct acr_fw_header *acr_fw_hdr = NULL;
+	struct bin_hdr *acr_fw_bin_hdr = NULL;
+	struct flcn_acr_desc *acr_dmem_desc;
+	struct wpr_carveout_info wpr_inf;
+	u32 *acr_ucode_header = NULL;
+	u32 *acr_ucode_data = NULL;
+	u64 tmp_addr;
+
+	nvgpu_log_fn(g, " ");
+
+	acr_fw_bin_hdr = (struct bin_hdr *)acr_fw->data;
+	acr_fw_hdr = (struct acr_fw_header *)
+		(acr_fw->data + acr_fw_bin_hdr->header_offset);
+
+	acr_ucode_data = (u32 *)(acr_fw->data + acr_fw_bin_hdr->data_offset);
+	acr_ucode_header = (u32 *)(acr_fw->data + acr_fw_hdr->hdr_offset);
+
+	acr->get_wpr_info(g, &wpr_inf);
+
+	acr_dmem_desc = (struct flcn_acr_desc *)
+		&(((u8 *)acr_ucode_data)[acr_ucode_header[2U]]);
+
+	acr_dmem_desc->nonwpr_ucode_blob_start = wpr_inf.nonwpr_base;
+	nvgpu_assert(wpr_inf.size <= U32_MAX);
+	acr_dmem_desc->nonwpr_ucode_blob_size = (u32)wpr_inf.size;
+	acr_dmem_desc->regions.no_regions = 1U;
+	acr_dmem_desc->wpr_offset = 0U;
+
+	acr_dmem_desc->wpr_region_id = 1U;
+	acr_dmem_desc->regions.region_props[0U].region_id = 1U;
+
+	tmp_addr = (wpr_inf.wpr_base) >> 8U;
+	nvgpu_assert(u64_hi32(tmp_addr) == 0U);
+	acr_dmem_desc->regions.region_props[0U].start_addr = U32(tmp_addr);
+
+	tmp_addr = ((wpr_inf.wpr_base) + wpr_inf.size) >> 8U;
+	nvgpu_assert(u64_hi32(tmp_addr) == 0U);
+	acr_dmem_desc->regions.region_props[0U].end_addr = U32(tmp_addr);
+
+	tmp_addr = wpr_inf.nonwpr_base >> 8U;
+	nvgpu_assert(u64_hi32(tmp_addr) == 0U);
+	acr_dmem_desc->regions.region_props[0U].shadowmMem_startaddress =
+		U32(tmp_addr);
+
+	return 0;
+}
+
+/* LSF init */
+static u32 tu104_acr_lsf_sec2(struct gk20a *g,
+		struct acr_lsf_config *lsf)
+{
+	/* SEC2 LS falcon info */
+	lsf->falcon_id = FALCON_ID_SEC2;
+	lsf->falcon_dma_idx = NV_SEC2_DMAIDX_UCODE;
+	lsf->is_lazy_bootstrap = false;
+	lsf->is_priv_load = false;
+	lsf->get_lsf_ucode_details = nvgpu_acr_lsf_sec2_ucode_details;
+	lsf->get_cmd_line_args_offset = NULL;
+
+	return BIT32(lsf->falcon_id);
+}
+
+static u32 tu104_acr_lsf_pmu(struct gk20a *g,
+		struct acr_lsf_config *lsf)
+{
+	/* PMU support not required until PSTATE support is enabled */
+	if (!g->support_ls_pmu) {
+		/* skip adding LS PMU ucode to ACR blob */
+		return 0;
+	}
+
+	/* PMU LS falcon info */
+	lsf->falcon_id = FALCON_ID_PMU;
+	lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE;
+	lsf->is_lazy_bootstrap = false;
+	lsf->is_priv_load = false;
+#ifdef CONFIG_NVGPU_LS_PMU
+	lsf->get_lsf_ucode_details = nvgpu_acr_lsf_pmu_ucode_details;
+	lsf->get_cmd_line_args_offset = nvgpu_pmu_fw_get_cmd_line_args_offset;
+#endif
+	return BIT32(lsf->falcon_id);
+}
+
+static u32 tu104_acr_lsf_fecs(struct gk20a *g,
+		struct acr_lsf_config *lsf)
+{
+	/* FECS LS falcon info */
+	lsf->falcon_id = FALCON_ID_FECS;
+	lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE;
+	lsf->is_lazy_bootstrap = true;
+	lsf->is_priv_load = true;
+	lsf->get_lsf_ucode_details = nvgpu_acr_lsf_fecs_ucode_details;
+	lsf->get_cmd_line_args_offset = NULL;
+
+	return BIT32(lsf->falcon_id);
+}
+
+static u32 tu104_acr_lsf_gpccs(struct gk20a *g,
+		struct acr_lsf_config *lsf)
+{
+	/* FECS LS falcon info */
+	lsf->falcon_id = FALCON_ID_GPCCS;
+	lsf->falcon_dma_idx = GK20A_PMU_DMAIDX_UCODE;
+	lsf->is_lazy_bootstrap = true;
+	lsf->is_priv_load = true;
+	lsf->get_lsf_ucode_details = nvgpu_acr_lsf_gpccs_ucode_details;
+	lsf->get_cmd_line_args_offset = NULL;
+
+	return BIT32(lsf->falcon_id);
+}
+
+static u32 tu104_acr_lsf_conifg(struct gk20a *g,
+	struct nvgpu_acr *acr)
+{
+	u32 lsf_enable_mask = 0;
+	lsf_enable_mask |= tu104_acr_lsf_pmu(g, &acr->lsf[FALCON_ID_PMU]);
+	lsf_enable_mask |= tu104_acr_lsf_fecs(g, &acr->lsf[FALCON_ID_FECS]);
+	lsf_enable_mask |= tu104_acr_lsf_gpccs(g, &acr->lsf[FALCON_ID_GPCCS]);
+	lsf_enable_mask |= tu104_acr_lsf_sec2(g, &acr->lsf[FALCON_ID_SEC2]);
+
+	return lsf_enable_mask;
+}
+
+/* fusa signing enable check */
+static bool tu104_acr_is_fusa_enabled(struct gk20a *g)
+{
+	return g->is_fusa_sku;
+}
+
+/* ACR-AHESASC(ACR hub encryption setter and signature checker) init*/
+static void tu104_acr_ahesasc_v0_ucode_select(struct gk20a *g,
+		struct hs_acr *acr_ahesasc)
+{
+	acr_ahesasc->acr_type = ACR_AHESASC_NON_FUSA;
+
+	if (!g->ops.pmu.is_debug_mode_enabled(g)) {
+		acr_ahesasc->acr_fw_name = HSBIN_ACR_AHESASC_NON_FUSA_PROD_UCODE;
+	} else {
+		acr_ahesasc->acr_fw_name = HSBIN_ACR_AHESASC_NON_FUSA_DBG_UCODE;
+	}
+
+}
+
+static void tu104_acr_ahesasc_fusa_ucode_select(struct gk20a *g,
+		struct hs_acr *acr_ahesasc)
+{
+	acr_ahesasc->acr_type = ACR_AHESASC_FUSA;
+
+	if (!g->ops.pmu.is_debug_mode_enabled(g)) {
+		acr_ahesasc->acr_fw_name = HSBIN_ACR_AHESASC_FUSA_PROD_UCODE;
+	} else {
+		acr_ahesasc->acr_fw_name = HSBIN_ACR_AHESASC_FUSA_DBG_UCODE;
+	}
+}
+
+static void tu104_acr_ahesasc_sw_init(struct gk20a *g,
+	struct hs_acr *acr_ahesasc)
+{
+	if (tu104_acr_is_fusa_enabled(g)) {
+		tu104_acr_ahesasc_fusa_ucode_select(g, acr_ahesasc);
+	} else {
+		tu104_acr_ahesasc_v0_ucode_select(g, acr_ahesasc);
+	}
+
+	acr_ahesasc->acr_flcn = &g->sec2.flcn;
+}
+
+/* ACR-ASB(ACR SEC2 booter) init*/
+static void tu104_acr_asb_v0_ucode_select(struct gk20a *g,
+	struct hs_acr *acr_asb)
+{
+	acr_asb->acr_type = ACR_ASB_NON_FUSA;
+
+	if (!g->ops.pmu.is_debug_mode_enabled(g)) {
+		acr_asb->acr_fw_name = HSBIN_ACR_ASB_NON_FUSA_PROD_UCODE;
+	} else {
+		acr_asb->acr_fw_name = HSBIN_ACR_ASB_NON_FUSA_DBG_UCODE;
+	}
+}
+
+static void tu104_acr_asb_fusa_ucode_select(struct gk20a *g,
+	struct hs_acr *acr_asb)
+{
+	acr_asb->acr_type = ACR_ASB_FUSA;
+
+	if (!g->ops.pmu.is_debug_mode_enabled(g)) {
+		acr_asb->acr_fw_name = HSBIN_ACR_ASB_FUSA_PROD_UCODE;
+	} else {
+		acr_asb->acr_fw_name = HSBIN_ACR_ASB_FUSA_DBG_UCODE;
+	}
+}
+
+static void tu104_acr_asb_sw_init(struct gk20a *g,
+	struct hs_acr *acr_asb)
+{
+	if (tu104_acr_is_fusa_enabled(g)) {
+		tu104_acr_asb_fusa_ucode_select(g, acr_asb);
+	} else {
+		tu104_acr_asb_v0_ucode_select(g, acr_asb);
+	}
+
+	acr_asb->acr_flcn = &g->gsp_flcn;
+}
+
+void nvgpu_tu104_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr)
+{
+	nvgpu_log_fn(g, " ");
+
+	acr->lsf_enable_mask = tu104_acr_lsf_conifg(g, acr);
+
+	acr->prepare_ucode_blob = nvgpu_acr_prepare_ucode_blob;
+	acr->get_wpr_info = nvgpu_acr_wpr_info_vid;
+	acr->alloc_blob_space = nvgpu_acr_alloc_blob_space_vid;
+	acr->bootstrap_owner = FALCON_ID_GSPLITE;
+	acr->bootstrap_hs_acr = tu104_bootstrap_hs_acr;
+	acr->patch_wpr_info_to_ucode = tu104_acr_patch_wpr_info_to_ucode;
+
+	/* Init ACR-AHESASC */
+	tu104_acr_ahesasc_sw_init(g, &acr->acr_ahesasc);
+
+	/* Init ACR-ASB*/
+	tu104_acr_asb_sw_init(g, &acr->acr_asb);
+}
--- a/drivers/gpu/nvgpu/common/acr/acr_sw_tu104.h
+++ b/drivers/gpu/nvgpu/common/acr/acr_sw_tu104.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef ACR_SW_TU104_H
+#define ACR_SW_TU104_H
+
+struct gk20a;
+struct nvgpu_acr;
+
+void nvgpu_tu104_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr);
+
+#endif /*ACR_SW_TU104_H*/
--- a/drivers/gpu/nvgpu/common/acr/acr_wpr.c
+++ b/drivers/gpu/nvgpu/common/acr/acr_wpr.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/types.h>
+#include <nvgpu/dma.h>
+
+#include "acr_wpr.h"
+
+/* Both size and address of WPR need to be 128K-aligned */
+#define DGPU_WPR_SIZE 0x200000U
+
+void nvgpu_acr_wpr_info_sys(struct gk20a *g, struct wpr_carveout_info *inf)
+{
+	g->ops.fb.read_wpr_info(g, &inf->wpr_base, &inf->size);
+}
+#ifdef CONFIG_NVGPU_DGPU
+void nvgpu_acr_wpr_info_vid(struct gk20a *g, struct wpr_carveout_info *inf)
+{
+	inf->wpr_base = g->mm.vidmem.bootstrap_base;
+	inf->nonwpr_base = inf->wpr_base + DGPU_WPR_SIZE;
+	inf->size = DGPU_WPR_SIZE;
+}
+#endif
--- a/drivers/gpu/nvgpu/common/acr/acr_wpr.h
+++ b/drivers/gpu/nvgpu/common/acr/acr_wpr.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef ACR_WPR_H
+#define ACR_WPR_H
+
+struct gk20a;
+struct wpr_carveout_info;
+
+struct wpr_carveout_info {
+	u64 wpr_base;
+	u64 nonwpr_base;
+	u64 size;
+};
+
+void nvgpu_acr_wpr_info_sys(struct gk20a *g, struct wpr_carveout_info *inf);
+#ifdef CONFIG_NVGPU_DGPU
+void nvgpu_acr_wpr_info_vid(struct gk20a *g, struct wpr_carveout_info *inf);
+#endif
+
+#endif /* NVGPU_ACR_WPR_H */
--- a/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface.h
+++ b/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface.h
@@ -0,0 +1,609 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_ACR_INTERFACE_H
+#define NVGPU_ACR_INTERFACE_H
+
+/**
+ * @defgroup NVGPURM_BLOB_CONSTRUCT blob construct
+ *
+ * Blob construct interfaces:
+ * NVGPU creates LS ucode blob in system/FB's non-WPR memory. LS ucodes
+ * will be read from filesystem and added to blob for the detected chip.
+ * Below are the structs that need to be filled by NvGPU for each LS Falcon
+ * ucode supported for the detected chip. After filling structures successfully,
+ * NvGPU should copy below structs along with ucode to the non-WPR blob
+ * in below mentioned pattern. LS ucodes blob is required by the ACR HS
+ * ucode to authenticate & load LS ucode on to respective engine's LS Falcon.
+ *
+ * + WPR header struct #lsf_wpr_header.
+ * + LSB header struct #lsf_lsb_header.
+ * + Boot loader struct #flcn_bl_dmem_desc.
+ * + ucode image.
+ *
+ * + BLOB Pattern:
+ *    ---------------------------------------------
+ *   | LSF WPR HDR  | LSF LSB HDR | BL desc | ucode |
+ *    ---------------------------------------------
+ */
+
+/**
+ * @ingroup NVGPURM_BLOB_CONSTRUCT
+ */
+/** @{*/
+
+/**
+ * Light Secure WPR Content Alignments
+ */
+/** WPR header should be aligned to 256 bytes */
+#define LSF_WPR_HEADER_ALIGNMENT        (256U)
+/** SUB WPR header should be aligned to 256 bytes */
+#define LSF_SUB_WPR_HEADER_ALIGNMENT    (256U)
+/** LSB header should be aligned to 256 bytes */
+#define LSF_LSB_HEADER_ALIGNMENT        (256U)
+/** BL DATA should be aligned to 256 bytes */
+#define LSF_BL_DATA_ALIGNMENT           (256U)
+/** BL DATA size should be aligned to 256 bytes */
+#define LSF_BL_DATA_SIZE_ALIGNMENT      (256U)
+/** BL CODE size should be aligned to 256 bytes */
+#define LSF_BL_CODE_SIZE_ALIGNMENT      (256U)
+/** LSF DATA size should be aligned to 256 bytes */
+#define LSF_DATA_SIZE_ALIGNMENT         (256U)
+/** LSF CODE size should be aligned to 256 bytes */
+#define LSF_CODE_SIZE_ALIGNMENT         (256U)
+
+/** UCODE surface should be aligned to 4k PAGE_SIZE */
+#define LSF_UCODE_DATA_ALIGNMENT 4096U
+
+/**
+ * Maximum WPR Header size
+ */
+#define LSF_WPR_HEADERS_TOTAL_SIZE_MAX	\
+	(ALIGN_UP(((u32)sizeof(struct lsf_wpr_header) * FALCON_ID_END), \
+		LSF_WPR_HEADER_ALIGNMENT))
+#define LSF_LSB_HEADER_TOTAL_SIZE_MAX	(\
+	ALIGN_UP(sizeof(struct lsf_lsb_header), LSF_LSB_HEADER_ALIGNMENT))
+
+/** @} */
+
+#ifdef CONFIG_NVGPU_DGPU
+/* Maximum SUB WPR header size */
+#define LSF_SUB_WPR_HEADERS_TOTAL_SIZE_MAX	(ALIGN_UP( \
+	(sizeof(struct lsf_shared_sub_wpr_header) * \
+	LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX), \
+	LSF_SUB_WPR_HEADER_ALIGNMENT))
+
+/* MMU excepts sub_wpr sizes in units of 4K */
+#define SUB_WPR_SIZE_ALIGNMENT	(4096U)
+
+/* Defined for 1MB alignment */
+#define SHIFT_4KB	(12U)
+
+/* shared sub_wpr use case IDs */
+enum {
+	LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_FRTS_VBIOS_TABLES	= 1,
+	LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_PLAYREADY_SHARED_DATA = 2
+};
+
+#define LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX \
+	LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_PLAYREADY_SHARED_DATA
+
+#define LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_INVALID	(0xFFFFFFFFU)
+
+#define MAX_SUPPORTED_SHARED_SUB_WPR_USE_CASES	\
+	LSF_SHARED_DATA_SUB_WPR_USE_CASE_ID_MAX
+
+/* Static sizes of shared subWPRs */
+/* Minimum granularity supported is 4K */
+/* 1MB in 4K */
+#define LSF_SHARED_DATA_SUB_WPR_FRTS_VBIOS_TABLES_SIZE_IN_4K	(0x100U)
+/* 4K */
+#define LSF_SHARED_DATA_SUB_WPR_PLAYREADY_SHARED_DATA_SIZE_IN_4K	(0x1U)
+#endif
+
+/**
+ * @ingroup NVGPURM_BLOB_CONSTRUCT
+ */
+/** @{*/
+
+/**
+ * Image status updated by ACR HS ucode to know the LS
+ * Falcon ucode status.
+ */
+/** IMAGE copied from NON-WPR to WPR BLOB*/
+#define LSF_IMAGE_STATUS_COPY                           (1U)
+/** LS Falcon ucode verification failed*/
+#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED         (2U)
+/** LS Falcon data verification failed*/
+#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED         (3U)
+/** Both ucode and data validation passed */
+#define LSF_IMAGE_STATUS_VALIDATION_DONE                (4U)
+/**
+ * LS Falcons such as FECS and GPCCS does not have signatures for binaries in
+ * debug environment(fmodel).
+ */
+#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED             (5U)
+/** LS Falcon validation passed & ready for bootstrap */
+#define LSF_IMAGE_STATUS_BOOTSTRAP_READY                (6U)
+
+/**
+ * Light Secure WPR Header
+ * Defines state allowing Light Secure Falcon bootstrapping.
+ */
+struct lsf_wpr_header {
+	 /**
+	 * LS Falcon ID
+	 * FALCON_ID_FECS  - 2
+	 * FALCON_ID_GPCCS - 3
+	 */
+	u32 falcon_id;
+	/**
+	 * LS Falcon LSB header offset from non-WPR base, below equation used
+	 * to get LSB header offset for each managed LS falcon.
+	 * Offset = Non-WPR base + #LSF_LSB_HEADER_ALIGNMENT +
+	 *          ((#LSF_UCODE_DATA_ALIGNMENT + #LSF_BL_DATA_ALIGNMENT) *
+	 *          LS Falcon index)
+	 *
+	 */
+	u32 lsb_offset;
+	/**
+	 * LS Falcon bootstrap owner, which performs bootstrapping of
+	 * supported LS Falcon from ACR HS ucode. Below are the bootstrapping
+	 * supporting Falcon owners.
+	 *  + Falcon #FALCON_ID_PMU
+	 *
+	 * On GV11B, bootstrap_owner set to #FALCON_ID_PMU as ACR HS ucode
+	 * runs on PMU Engine Falcon.
+	 *
+	 */
+	u32 bootstrap_owner;
+	/**
+	 * Skip bootstrapping by ACR HS ucode,
+	 * 1 - skip LS Falcon bootstrapping by ACR HS ucode.
+	 * 0 - LS Falcon bootstrapping is done by ACR HS ucode.
+	 *
+	 * On GV11B, always set 0.
+	 */
+	u32 lazy_bootstrap;
+	/** LS ucode bin version*/
+	u32 bin_version;
+	/**
+	 * Bootstrapping status updated by ACR HS ucode to know the LS
+	 * Falcon ucode status.
+	 */
+	u32 status;
+};
+
+/** @} */
+
+/**
+ * @ingroup NVGPURM_BLOB_CONSTRUCT
+ */
+/** @{*/
+/**
+ * Size in entries of the ucode descriptor's dependency map.
+ */
+#define LSF_FALCON_DEPMAP_SIZE  (11U)
+
+/**
+ * Code/data signature details of LS falcon
+ */
+struct lsf_ucode_desc {
+	/** ucode's production signature */
+	u8  prd_keys[2][16];
+	/** ucode's debug signature */
+	u8  dbg_keys[2][16];
+	/**
+	 * production signature present status,
+	 * 1 - production signature present
+	 * 0 - production signature not present
+	 */
+	u32 b_prd_present;
+	/**
+	 * debug signature present
+	 * 1 - debug signature present
+	 * 0 - debug signature not present
+	 */
+	u32 b_dbg_present;
+	/**
+	 * LS Falcon ID
+	 * FALCON_ID_FECS  - 2
+	 * FALCON_ID_GPCCS - 3
+	 */
+	u32 falcon_id;
+	/**
+	 * include version in signature calculation if supported
+	 * 1 - supported
+	 * 0 - not supported
+	 */
+	u32 bsupports_versioning;
+	/** version to include it in signature calculation if supported */
+	u32 version;
+	/** valid dependency map data to consider from  dep_map array member */
+	u32 dep_map_count;
+	/**
+	 * packed dependency map used to compute the DM hashes on the code and
+	 * data.
+	 */
+	u8  dep_map[LSF_FALCON_DEPMAP_SIZE * 2 * 4];
+	/** Message used to derive key */
+	u8  kdf[16];
+};
+
+/** @} */
+
+/**
+ * @ingroup NVGPURM_BLOB_CONSTRUCT
+ */
+/** @{*/
+
+/**
+ * Light Secure Bootstrap Header
+ * Defines state allowing Light Secure Falcon bootstrapping.
+ */
+/** Load BL at 0th IMEM offset */
+#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_FALSE       0U
+#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE        BIT32(0)
+/** This falcon requires a ctx before issuing DMAs. */
+#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_FALSE       0U
+#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE        BIT32(2)
+/** Use priv loading method instead of bootloader/DMAs */
+#define NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE       BIT32(3)
+#define NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_FALSE      (0U)
+struct lsf_lsb_header {
+	/** Code/data signature details of each LS falcon */
+	struct lsf_ucode_desc signature;
+	/**
+	 * Offset from non-WPR base where UCODE is located,
+	 * Offset = Non-WPR base + #LSF_LSB_HEADER_ALIGNMENT +
+	 *          #LSF_UCODE_DATA_ALIGNMENT + ( #LSF_BL_DATA_ALIGNMENT *
+	 *          LS Falcon index)
+	 */
+	u32 ucode_off;
+	/**
+	 * Size of LS Falcon ucode, required to perform signature verification
+	 * of LS Falcon ucode by ACR HS.
+	 */
+	u32 ucode_size;
+	/**
+	 * Size of LS Falcon ucode data, required to perform signature
+	 * verification of LS Falcon ucode data by ACR HS.
+	 */
+	u32 data_size;
+	/**
+	 * Size of bootloader that needs to be loaded by bootstrap owner.
+	 *
+	 * On GV11B, respective LS Falcon BL code size should not exceed
+	 * below mentioned size.
+	 * FALCON_ID_FECS IMEM size  - 32k
+	 * FALCON_ID_GPCCS IMEM size - 16k
+	 */
+	u32 bl_code_size;
+	/** BL starting virtual address. Need for tagging */
+	u32 bl_imem_off;
+	/**
+	 * Offset from non-WPR base holding the BL data
+	 * Offset = (Non-WPR base + #LSF_LSB_HEADER_ALIGNMENT +
+	 *          #LSF_UCODE_DATA_ALIGNMENT + #LSF_BL_DATA_ALIGNMENT) *
+	 *          #LS Falcon index
+	 */
+	u32 bl_data_off;
+	/**
+	 * Size of BL data, BL data will be copied to LS Falcon DMEM of
+	 * bl data size
+	 *
+	 * On GV11B, respective LS Falcon BL data size should not exceed
+	 * below mentioned size.
+	 * FALCON_ID_FECS DMEM size  - 8k
+	 * FALCON_ID_GPCCS DMEM size - 5k
+	 */
+	u32 bl_data_size;
+	/**
+	 * Offset from non-WPR base address where UCODE Application code is
+	 * located.
+	 */
+	u32 app_code_off;
+	/**
+	 * Size of UCODE Application code.
+	 *
+	 * On GV11B, FECS/GPCCS LS Falcon app code size should not exceed
+	 * below mentioned size.
+	 * FALCON_ID_FECS IMEM size  - 32k
+	 * FALCON_ID_GPCCS IMEM size - 16k
+	 */
+	u32 app_code_size;
+	/**
+	 * Offset from non-WPR base address where UCODE Application data
+	 * is located
+	 */
+	u32 app_data_off;
+	/**
+	 * Size of UCODE Application data.
+	 *
+	 * On GV11B, respective LS Falcon app data size should not exceed
+	 * below mentioned size.
+	 * FALCON_ID_FECS DMEM size  - 8k
+	 * FALCON_ID_GPCCS DMEM size - 5k
+	 */
+	u32 app_data_size;
+	/**
+	 * NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0 - Load BL at 0th IMEM offset
+	 * NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX - This falcon requires a ctx
+	 * before issuing DMAs.
+	 * NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD - Use priv loading method
+	 * instead of bootloader/DMAs
+	 */
+	u32 flags;
+};
+
+#define FLCN_SIG_SIZE	(4U)
+/** @} */
+
+/**
+ * @ingroup NVGPURM_BLOB_CONSTRUCT
+ */
+/** @{*/
+/**
+ * Structure used by the boot-loader to load the rest of the LS Falcon code.
+ *
+ * This has to be filled by the GPU driver and copied into WPR region offset
+ * holding the BL data.
+ */
+struct flcn_bl_dmem_desc {
+	/** Should be always first element */
+	u32 reserved[FLCN_SIG_SIZE];
+	/**
+	 * Signature should follow reserved 16B signature for secure code.
+	 * 0s if no secure code
+	 */
+	u32 signature[FLCN_SIG_SIZE];
+	/**
+	 * Type of memory-aperture DMA index used by the bootloader
+	 * while loading code/data.
+	 */
+	u32 ctx_dma;
+	/**
+	 * 256B aligned physical sysmem(iGPU)/FB(dGPU) address where code
+	 * is located.
+	 */
+	struct falc_u64 code_dma_base;
+	/**
+	 * Offset from code_dma_base where the nonSecure code is located.
+	 * The offset must be multiple of 256 to help performance.
+	 */
+	u32 non_sec_code_off;
+	/**
+	 * The size of the non-secure code part.
+	 *
+	 * On GV11B, FECS/GPCCS LS Falcon non-secure + secure code size
+	 * should not exceed below mentioned size.
+	 * FALCON_ID_FECS IMEM size  - 32k
+	 * FALCON_ID_GPCCS IMEM size - 16k
+	 */
+	u32 non_sec_code_size;
+	/**
+	 * Offset from code_dma_base where the secure code is located.
+	 * The offset must be multiple of 256 to help performance.
+	 */
+	u32 sec_code_off;
+	/**
+	 * The size of the secure code part.
+	 *
+	 * On GV11B, FECS/GPCCS LS Falcon non-secure + secure code size
+	 * should not exceed below mentioned size.
+	 * FALCON_ID_FECS IMEM size  - 32k
+	 * FALCON_ID_GPCCS IMEM size - 16k
+	 */
+	u32 sec_code_size;
+	/**
+	 * Code entry point which will be invoked by BL after code is
+	 * loaded.
+	 */
+	u32 code_entry_point;
+	/**
+	 * 256B aligned Physical sysmem(iGPU)/FB(dGPU) Address where data
+	 * is located.
+	 */
+	struct falc_u64 data_dma_base;
+	/**
+	 * Size of data block. Should be multiple of 256B.
+	 *
+	 * On GV11B, respective LS Falcon data size should not exceed
+	 * below mentioned size.
+	 * FALCON_ID_FECS DMEM size  - 8k
+	 * FALCON_ID_GPCCS DMEM size - 5k
+	 */
+	u32 data_size;
+	/** Arguments to be passed to the target firmware being loaded. */
+	u32 argc;
+	/**
+	 * Number of arguments to be passed to the target firmware
+	 * being loaded.
+	 */
+	u32 argv;
+};
+
+/** @} */
+
+/**
+ * @defgroup NVGPURM_ACR_HS_LOAD_BOOTSTRAP ACR HS ucode load & bootstrap
+ *
+ * ACR HS ucode load & bootstrap interfaces:
+ * ACR HS ucode is read from the filesystem based on the chip-id by the ACR
+ * unit. Read ACR HS ucode will be update with below structs by patching at
+ * offset present in struct #struct acr_fw_header member hdr_offset. Read
+ * ACR HS ucode is loaded onto PMU/SEC2/GSP engines Falcon to bootstrap
+ * ACR HS ucode. ACR HS ucode does self-authentication using H/W based
+ * HS authentication methodology. Once authenticated the ACR HS ucode
+ * starts executing on the falcon.
+ */
+
+/**
+ * @ingroup NVGPURM_ACR_HS_LOAD_BOOTSTRAP
+ */
+/** @{*/
+
+/**
+ * Supporting maximum of 2 regions.
+ * This is needed to pre-allocate space in DMEM
+ */
+#define NVGPU_FLCN_ACR_MAX_REGIONS                (2U)
+/** Reserve 512 bytes for bootstrap owner LS ucode data */
+#define LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE    (0x200U)
+
+/**
+ * The descriptor used by ACR HS ucode to figure out properties of individual
+ * WPR regions.
+ *
+ * On GV11B, this struct members are set to 0x0 by default, reason
+ * to fetch WPR1 details from H/W.
+ */
+struct flcn_acr_region_prop {
+	/** Starting address of WPR region */
+	u32 start_addr;
+	/** Ending address of WPR region */
+	u32 end_addr;
+	/** The ID of the WPR region. 0 for WPR1 and 1 for WPR2  */
+	u32 region_id;
+	/** Read mask associated with this region */
+	u32 read_mask;
+	/** Write mask associated with this region */
+	u32 write_mask;
+	/** Bit map of all clients currently using this region */
+	u32 client_mask;
+	/**
+	 * sysmem(iGPU)/FB(dGPU) location from where contents need to
+	 * be copied to startAddress
+	 */
+	u32 shadowmMem_startaddress;
+};
+
+/**
+ * The descriptor used by ACR HS ucode to figure out supporting regions &
+ * its properties.
+ */
+struct flcn_acr_regions {
+	/**
+	 * Number of regions used by NVGPU from the total number of ACR
+	 * regions supported in chip.
+	 *
+	 * On GV11B, 1 ACR region supported and should always be greater
+	 * than 0.
+	 */
+	u32 no_regions;
+	/** Region properties */
+	struct flcn_acr_region_prop region_props[NVGPU_FLCN_ACR_MAX_REGIONS];
+};
+
+#define DMEM_WORD_SIZE		4U
+#define DUMMY_SPACE_SIZE	4U
+/**
+ * The descriptor used by ACR HS ucode to figure out the
+ * WPR & non-WPR blob details.
+ */
+struct flcn_acr_desc {
+	/*
+	 * The bootstrap owner needs to switch into LS mode when bootstrapping
+	 * other LS Falcons is completed. It needs to have its own actual
+	 * DMEM image copied into DMEM as part of LS setup. If ACR desc is
+	 * at location 0, it will definitely get overwritten causing data
+	 * corruption. Hence need to reserve 0x200 bytes to give room for
+	 * any loading data.
+	 * NOTE: This has to be the first member always.
+	 */
+	union {
+		u32 reserved_dmem[(LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE/DMEM_WORD_SIZE)];
+	} ucode_reserved_space;
+	/** Signature of ACR ucode. */
+	u32 signatures[FLCN_SIG_SIZE];
+	/**
+	 * WPR Region ID holding the WPR header and its details
+	 *
+	 * on GV11B, wpr_region_id set to 0x0 by default to indicate
+	 * to ACR HS ucode to fetch WPR region details from H/W &
+	 * updating WPR start_addr, end_addr, read_mask & write_mask
+	 * of struct #flcn_acr_region_prop.
+	 */
+	u32 wpr_region_id;
+	/** Offset from the non-WPR base holding the wpr header */
+	u32 wpr_offset;
+	/** usable memory ranges, on GV11B it is not set */
+	u32 mmu_mem_range;
+	/**
+	 * WPR Region descriptors to provide info about WPR.
+	 * on GV11B, no_regions set to 1 & region properties value to 0x0
+	 * to indicate to ACR HS ucode to fetch WPR region details from H/W.
+	 */
+	struct flcn_acr_regions regions;
+	/**
+	 * stores the size of the ucode blob.
+	 *
+	 * On GV11B, size is calculated at runtime & aligned to 256 bytes.
+	 * Size varies based on number of LS falcon supports.
+	 */
+	u32 nonwpr_ucode_blob_size;
+	/**
+	 * stores sysmem(iGPU)/FB's(dGPU) non-WPR start address where
+	 * kernel stores ucode blob
+	 */
+	u64 nonwpr_ucode_blob_start;
+	/** dummy space, not used by iGPU */
+	u32 dummy[DUMMY_SPACE_SIZE];
+};
+
+struct flcn2_acr_desc {
+	/**
+	 * WPR Region ID holding the WPR header and its details
+	 *
+	 * on GPUID_NEXT, wpr_region_id set to 0x0 by default to indicate
+	 * to ACR HS ucode to fetch WPR region details from H/W &
+	 * updating WPR start_addr, end_addr, read_mask & write_mask
+	 * of struct #flcn_acr_region_prop.
+	 */
+	u32 wpr_region_id;
+	/** Offset from the non-WPR base holding the wpr header */
+	u32 wpr_offset;
+	/**
+	 * WPR Region descriptors to provide info about WPR.
+	 * on GPUID_NEXT, no_regions set to 1 & region properties value to 0x0
+	 * to indicate to ACR HS ucode to fetch WPR region details from H/W.
+	 */
+	struct flcn_acr_regions regions;
+	/**
+	 * stores the size of the ucode blob.
+	 *
+	 * On GPUID_NEXT, size is calculated at runtime & aligned to 256 bytes.
+	 * Size varies based on number of LS falcon supports.
+	 */
+	u32 nonwpr_ucode_blob_size;
+	/**
+	 * stores sysmem(iGPU)/FB's(dGPU) non-WPR start address where
+	 * kernel stores ucode blob
+	 */
+	u64 nonwpr_ucode_blob_start;
+
+	u64 ls_pmu_desc;
+};
+
+/** @} */
+
+#endif /* NVGPU_ACR_INTERFACE_H */
--- a/drivers/gpu/nvgpu/common/cbc/cbc.c
+++ b/drivers/gpu/nvgpu/common/cbc/cbc.c
@@ -0,0 +1,122 @@
+/*
+ * CBC
+ *
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/cbc.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/log.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/comptags.h>
+
+void nvgpu_cbc_remove_support(struct gk20a *g)
+{
+	struct nvgpu_cbc *cbc = g->cbc;
+
+	nvgpu_log_fn(g, " ");
+
+	if (cbc == NULL) {
+		return;
+	}
+
+	if (nvgpu_mem_is_valid(&cbc->compbit_store.mem)) {
+		nvgpu_dma_free(g, &cbc->compbit_store.mem);
+		(void) memset(&cbc->compbit_store, 0,
+			sizeof(struct compbit_store_desc));
+	}
+	gk20a_comptag_allocator_destroy(g, &cbc->comp_tags);
+
+	nvgpu_kfree(g, cbc);
+	g->cbc = NULL;
+}
+
+/*
+ * This function is triggered during finalize_poweron multiple times.
+ * This function should not return if cbc is not NULL.
+ * cbc.init(), which re-writes HW registers that are reset during suspend,
+ * should be allowed to execute each time.
+ */
+int nvgpu_cbc_init_support(struct gk20a *g)
+{
+	int err = 0;
+	struct nvgpu_cbc *cbc = g->cbc;
+
+	nvgpu_log_fn(g, " ");
+
+	if (cbc == NULL) {
+		cbc = nvgpu_kzalloc(g, sizeof(*cbc));
+		if (cbc == NULL) {
+			return -ENOMEM;
+		}
+		g->cbc = cbc;
+
+		if (g->ops.cbc.alloc_comptags != NULL) {
+			err = g->ops.cbc.alloc_comptags(g, g->cbc);
+			if (err != 0) {
+				nvgpu_err(g, "Failed to allocate comptags");
+				nvgpu_kfree(g, cbc);
+				g->cbc = NULL;
+				return err;
+			}
+		}
+	}
+
+	if (g->ops.cbc.init != NULL) {
+		g->ops.cbc.init(g, g->cbc);
+	}
+
+	return err;
+}
+
+int nvgpu_cbc_alloc(struct gk20a *g, size_t compbit_backing_size,
+			bool vidmem_alloc)
+{
+	struct nvgpu_cbc *cbc = g->cbc;
+
+	if (nvgpu_mem_is_valid(&cbc->compbit_store.mem) != 0) {
+		return 0;
+	}
+
+#ifdef CONFIG_NVGPU_DGPU
+	if (vidmem_alloc == true) {
+		/*
+		 * Backing store MUST be physically contiguous and allocated in
+		 * one chunk
+		 * Vidmem allocation API does not support FORCE_CONTIGUOUS like
+		 * flag to allocate contiguous memory
+		 * But this allocation will happen in vidmem bootstrap allocator
+		 * which always allocates contiguous memory
+		 */
+		return nvgpu_dma_alloc_vid(g,
+					 compbit_backing_size,
+					 &cbc->compbit_store.mem);
+	} else
+#endif
+	{
+		return nvgpu_dma_alloc_flags_sys(g,
+					 NVGPU_DMA_PHYSICALLY_ADDRESSED,
+					 compbit_backing_size,
+					 &cbc->compbit_store.mem);
+	}
+}
--- a/drivers/gpu/nvgpu/common/ce/ce.c
+++ b/drivers/gpu/nvgpu/common/ce/ce.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/types.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/engines.h>
+#include <nvgpu/device.h>
+#include <nvgpu/ce.h>
+#include <nvgpu/power_features/cg.h>
+#include <nvgpu/cic.h>
+#include <nvgpu/mc.h>
+
+int nvgpu_ce_init_support(struct gk20a *g)
+{
+	int err = 0;
+
+	if (g->ops.ce.set_pce2lce_mapping != NULL) {
+		g->ops.ce.set_pce2lce_mapping(g);
+	}
+
+	err = nvgpu_mc_reset_devtype(g, NVGPU_DEVTYPE_LCE);
+	if (err != 0) {
+		nvgpu_err(g, "NVGPU_DEVTYPE_LCE reset failed");
+		return err;
+	}
+
+	nvgpu_cg_slcg_ce2_load_enable(g);
+
+	nvgpu_cg_blcg_ce_load_enable(g);
+
+#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT)
+	nvgpu_cg_elcg_ce_load_enable(g);
+#endif
+
+	if (g->ops.ce.init_prod_values != NULL) {
+		g->ops.ce.init_prod_values(g);
+	}
+
+	if (g->ops.ce.init_hw != NULL) {
+		g->ops.ce.init_hw(g);
+	}
+
+	if (g->ops.ce.intr_enable != NULL) {
+		g->ops.ce.intr_enable(g, true);
+	}
+
+	/** Enable interrupts at MC level */
+	nvgpu_cic_intr_stall_unit_config(g, NVGPU_CIC_INTR_UNIT_CE, NVGPU_CIC_INTR_ENABLE);
+	nvgpu_cic_intr_nonstall_unit_config(g, NVGPU_CIC_INTR_UNIT_CE, NVGPU_CIC_INTR_ENABLE);
+
+	return 0;
+}
--- a/drivers/gpu/nvgpu/common/ce/ce_app.c
+++ b/drivers/gpu/nvgpu/common/ce/ce_app.c
@@ -0,0 +1,621 @@
+/*
+ * Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/types.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/engines.h>
+#include <nvgpu/os_sched.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/watchdog.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/utils.h>
+#include <nvgpu/fence.h>
+#include <nvgpu/ce_app.h>
+#include <nvgpu/power_features/cg.h>
+
+#include "common/ce/ce_priv.h"
+
+static inline u32 nvgpu_ce_get_valid_launch_flags(struct gk20a *g,
+		u32 launch_flags)
+{
+#ifdef CONFIG_NVGPU_DGPU
+	/*
+	 * there is no local memory available,
+	 * don't allow local memory related CE flags
+	 */
+	if (g->mm.vidmem.size == 0ULL) {
+		launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB |
+			NVGPU_CE_DST_LOCATION_LOCAL_FB);
+	}
+#endif
+	return launch_flags;
+}
+
+int nvgpu_ce_execute_ops(struct gk20a *g,
+		u32 ce_ctx_id,
+		u64 src_paddr,
+		u64 dst_paddr,
+		u64 size,
+		u32 payload,
+		u32 launch_flags,
+		u32 request_operation,
+		u32 submit_flags,
+		struct nvgpu_fence_type **fence_out)
+{
+	int ret = -EPERM;
+	struct nvgpu_ce_app *ce_app = g->ce_app;
+	struct nvgpu_ce_gpu_ctx *ce_ctx, *ce_ctx_save;
+	bool found = false;
+	u32 *cmd_buf_cpu_va;
+	u64 cmd_buf_gpu_va = 0UL;
+	u32 method_size;
+	u32 cmd_buf_read_offset;
+	u32 dma_copy_class;
+	struct nvgpu_gpfifo_entry gpfifo;
+	struct nvgpu_channel_fence fence = {0U, 0U};
+	struct nvgpu_fence_type *ce_cmd_buf_fence_out = NULL;
+
+	if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) {
+		goto end;
+	}
+
+	/* This shouldn't happen */
+	if (size == 0ULL) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	if (request_operation != NVGPU_CE_PHYS_MODE_TRANSFER &&
+	    request_operation != NVGPU_CE_MEMSET) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	if (src_paddr > NVGPU_CE_MAX_ADDRESS) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	if (dst_paddr > NVGPU_CE_MAX_ADDRESS) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	nvgpu_mutex_acquire(&ce_app->app_mutex);
+
+	nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
+			&ce_app->allocated_contexts, nvgpu_ce_gpu_ctx, list) {
+		if (ce_ctx->ctx_id == ce_ctx_id) {
+			found = true;
+			break;
+		}
+	}
+
+	nvgpu_mutex_release(&ce_app->app_mutex);
+
+	if (!found) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
+		ret = -ENODEV;
+		goto end;
+	}
+
+	nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
+
+	ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS;
+
+	cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
+			(NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_SUBMIT /
+			U32(sizeof(u32))));
+
+	cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
+
+	if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] != NULL) {
+		struct nvgpu_fence_type **prev_post_fence =
+			&ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset];
+
+		ret = nvgpu_fence_wait(g, *prev_post_fence,
+				       nvgpu_get_poll_timeout(g));
+
+		nvgpu_fence_put(*prev_post_fence);
+		*prev_post_fence = NULL;
+		if (ret != 0) {
+			goto noop;
+		}
+	}
+
+	cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va +
+			(u64)(cmd_buf_read_offset * sizeof(u32)));
+
+	dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS);
+	method_size = nvgpu_ce_prepare_submit(src_paddr,
+			dst_paddr,
+			size,
+			&cmd_buf_cpu_va[cmd_buf_read_offset],
+			payload,
+			nvgpu_ce_get_valid_launch_flags(g, launch_flags),
+			request_operation,
+			dma_copy_class);
+	nvgpu_assert(method_size <= NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_SUBMIT);
+
+	if (method_size != 0U) {
+		/* store the element into gpfifo */
+		g->ops.pbdma.format_gpfifo_entry(g, &gpfifo,
+				cmd_buf_gpu_va, method_size);
+
+		/*
+		 * take always the postfence as it is needed for protecting the
+		 * ce context
+		 */
+		submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
+
+		nvgpu_smp_wmb();
+
+		ret = nvgpu_submit_channel_gpfifo_kernel(ce_ctx->ch, &gpfifo,
+				1, submit_flags, &fence, &ce_cmd_buf_fence_out);
+
+		if (ret == 0) {
+			ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] =
+				ce_cmd_buf_fence_out;
+			if (fence_out != NULL) {
+				nvgpu_fence_get(ce_cmd_buf_fence_out);
+				*fence_out = ce_cmd_buf_fence_out;
+			}
+
+			/* Next available command buffer queue Index */
+			++ce_ctx->cmd_buf_read_queue_offset;
+		}
+	} else {
+		ret = -ENOMEM;
+	}
+noop:
+	nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
+end:
+	return ret;
+}
+
+/* static CE app api */
+static void nvgpu_ce_put_fences(struct nvgpu_ce_gpu_ctx *ce_ctx)
+{
+	u32 i;
+
+	for (i = 0U; i < NVGPU_CE_MAX_INFLIGHT_JOBS; i++) {
+		struct nvgpu_fence_type **fence = &ce_ctx->postfences[i];
+
+		if (*fence != NULL) {
+			nvgpu_fence_put(*fence);
+		}
+		*fence = NULL;
+	}
+}
+
+/* caller must hold ce_app->app_mutex */
+static void nvgpu_ce_delete_gpu_context_locked(struct nvgpu_ce_gpu_ctx *ce_ctx)
+{
+	struct nvgpu_list_node *list = &ce_ctx->list;
+
+	ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_DELETED;
+	ce_ctx->tsg->abortable = true;
+
+	nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
+
+	if (nvgpu_mem_is_valid(&ce_ctx->cmd_buf_mem)) {
+		nvgpu_ce_put_fences(ce_ctx);
+		nvgpu_dma_unmap_free(ce_ctx->vm, &ce_ctx->cmd_buf_mem);
+	}
+
+	/*
+	 * free the channel
+	 * nvgpu_channel_close() will also unbind the channel from TSG
+	 */
+	nvgpu_channel_close(ce_ctx->ch);
+	nvgpu_ref_put(&ce_ctx->tsg->refcount, nvgpu_tsg_release);
+
+	/* housekeeping on app */
+	if ((list->prev != NULL) && (list->next != NULL)) {
+		nvgpu_list_del(list);
+	}
+
+	nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
+	nvgpu_mutex_destroy(&ce_ctx->gpu_ctx_mutex);
+
+	nvgpu_kfree(ce_ctx->g, ce_ctx);
+}
+
+static u32 nvgpu_prepare_ce_op(u32 *cmd_buf_cpu_va,
+		u64 src_paddr, u64 dst_paddr,
+		u32 width, u32 height, u32 payload,
+		bool mode_transfer, u32 launch_flags)
+{
+	u32 launch = 0U;
+	u32 methodSize = 0U;
+
+	if (mode_transfer) {
+		/* setup the source */
+		cmd_buf_cpu_va[methodSize++] = 0x20028100;
+		cmd_buf_cpu_va[methodSize++] = (u64_hi32(src_paddr) &
+			NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK);
+		cmd_buf_cpu_va[methodSize++] = (u64_lo32(src_paddr) &
+			NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK);
+
+		cmd_buf_cpu_va[methodSize++] = 0x20018098;
+		if ((launch_flags &
+		     NVGPU_CE_SRC_LOCATION_LOCAL_FB) != 0U) {
+			cmd_buf_cpu_va[methodSize++] = 0x00000000;
+		} else if ((launch_flags &
+		     NVGPU_CE_SRC_LOCATION_NONCOHERENT_SYSMEM) != 0U) {
+			cmd_buf_cpu_va[methodSize++] = 0x00000002;
+		} else {
+			cmd_buf_cpu_va[methodSize++] = 0x00000001;
+		}
+
+		launch |= 0x00001000U;
+	} else { /* memset */
+		/* Remap from component A on 1 byte wide pixels */
+		cmd_buf_cpu_va[methodSize++] = 0x200181c2;
+		cmd_buf_cpu_va[methodSize++] = 0x00000004;
+
+		cmd_buf_cpu_va[methodSize++] = 0x200181c0;
+		cmd_buf_cpu_va[methodSize++] = payload;
+
+		launch |= 0x00000400U;
+	}
+
+	/* setup the destination/output */
+	cmd_buf_cpu_va[methodSize++] = 0x20068102;
+	cmd_buf_cpu_va[methodSize++] = (u64_hi32(dst_paddr) &
+		NVGPU_CE_UPPER_ADDRESS_OFFSET_MASK);
+	cmd_buf_cpu_va[methodSize++] = (u64_lo32(dst_paddr) &
+		NVGPU_CE_LOWER_ADDRESS_OFFSET_MASK);
+	/* Pitch in/out */
+	cmd_buf_cpu_va[methodSize++] = width;
+	cmd_buf_cpu_va[methodSize++] = width;
+	/* width and line count */
+	cmd_buf_cpu_va[methodSize++] = width;
+	cmd_buf_cpu_va[methodSize++] = height;
+
+	cmd_buf_cpu_va[methodSize++] = 0x20018099;
+	if ((launch_flags & NVGPU_CE_DST_LOCATION_LOCAL_FB) != 0U) {
+		cmd_buf_cpu_va[methodSize++] = 0x00000000;
+	} else if ((launch_flags &
+		   NVGPU_CE_DST_LOCATION_NONCOHERENT_SYSMEM) != 0U) {
+		cmd_buf_cpu_va[methodSize++] = 0x00000002;
+	} else {
+		cmd_buf_cpu_va[methodSize++] = 0x00000001;
+	}
+
+	launch |= 0x00002005U;
+
+	if ((launch_flags &
+	     NVGPU_CE_SRC_MEMORY_LAYOUT_BLOCKLINEAR) != 0U) {
+		launch |= 0x00000000U;
+	} else {
+		launch |= 0x00000080U;
+	}
+
+	if ((launch_flags &
+	     NVGPU_CE_DST_MEMORY_LAYOUT_BLOCKLINEAR) != 0U) {
+		launch |= 0x00000000U;
+	} else {
+		launch |= 0x00000100U;
+	}
+
+	cmd_buf_cpu_va[methodSize++] = 0x200180c0;
+	cmd_buf_cpu_va[methodSize++] = launch;
+
+	return methodSize;
+}
+
+u32 nvgpu_ce_prepare_submit(u64 src_paddr,
+		u64 dst_paddr,
+		u64 size,
+		u32 *cmd_buf_cpu_va,
+		u32 payload,
+		u32 launch_flags,
+		u32 request_operation,
+		u32 dma_copy_class)
+{
+	u32 methodSize = 0;
+	u64 low, hi;
+	bool mode_transfer = (request_operation == NVGPU_CE_PHYS_MODE_TRANSFER);
+
+	/* set the channel object */
+	cmd_buf_cpu_va[methodSize++] = 0x20018000;
+	cmd_buf_cpu_va[methodSize++] = dma_copy_class;
+
+	/*
+	 * The CE can work with 2D rectangles of at most 0xffffffff or 4G-1
+	 * pixels per line. Exactly 2G is a more round number, so we'll use
+	 * that as the base unit to clear large amounts of memory. If the
+	 * requested size is not a multiple of 2G, we'll do one clear first to
+	 * deal with the low bits, followed by another in units of 2G.
+	 *
+	 * We'll use 1 bytes per pixel to do byte aligned sets/copies. The
+	 * maximum number of lines is also 4G-1, so (4G-1) * 2 GB is enough for
+	 * whole vidmem.
+	 */
+
+	/* Lower 2GB */
+	low = size & 0x7fffffffULL;
+	/* Over 2GB */
+	hi  = size >> 31U;
+
+	/*
+	 * Unable to fit this in one submit, but no device should have this
+	 * much memory anyway.
+	 */
+	if (hi > 0xffffffffULL) {
+		/* zero size means error */
+		return 0;
+	}
+
+	if (low != 0U) {
+		/* do the low bytes in one long line */
+		methodSize += nvgpu_prepare_ce_op(&cmd_buf_cpu_va[methodSize],
+				src_paddr, dst_paddr,
+				nvgpu_safe_cast_u64_to_u32(low), 1,
+				payload, mode_transfer, launch_flags);
+	}
+	if (hi != 0U) {
+		/* do the high bytes in many 2G lines */
+		methodSize += nvgpu_prepare_ce_op(&cmd_buf_cpu_va[methodSize],
+				src_paddr + low, dst_paddr + low,
+				0x80000000ULL, nvgpu_safe_cast_u64_to_u32(hi),
+				payload, mode_transfer, launch_flags);
+	}
+
+	return methodSize;
+}
+
+/* global CE app related apis */
+int nvgpu_ce_app_init_support(struct gk20a *g)
+{
+	struct nvgpu_ce_app *ce_app = g->ce_app;
+
+	if (unlikely(ce_app == NULL)) {
+		ce_app = nvgpu_kzalloc(g, sizeof(*ce_app));
+		if (ce_app == NULL) {
+			return -ENOMEM;
+		}
+		g->ce_app = ce_app;
+	}
+
+	if (ce_app->initialised) {
+		/* assume this happen during poweron/poweroff GPU sequence */
+		ce_app->app_state = NVGPU_CE_ACTIVE;
+		return 0;
+	}
+
+	nvgpu_log(g, gpu_dbg_fn, "ce: init");
+
+	nvgpu_mutex_init(&ce_app->app_mutex);
+
+	nvgpu_mutex_acquire(&ce_app->app_mutex);
+
+	nvgpu_init_list_node(&ce_app->allocated_contexts);
+	ce_app->ctx_count = 0;
+	ce_app->next_ctx_id = 0;
+	ce_app->initialised = true;
+	ce_app->app_state = NVGPU_CE_ACTIVE;
+
+	nvgpu_mutex_release(&ce_app->app_mutex);
+
+	nvgpu_log(g, gpu_dbg_cde_ctx, "ce: init finished");
+
+	return 0;
+}
+
+void nvgpu_ce_app_destroy(struct gk20a *g)
+{
+	struct nvgpu_ce_app *ce_app = g->ce_app;
+	struct nvgpu_ce_gpu_ctx *ce_ctx, *ce_ctx_save;
+
+	if (ce_app == NULL) {
+		return;
+	}
+
+	if (ce_app->initialised == false) {
+		goto free;
+	}
+
+	ce_app->app_state = NVGPU_CE_SUSPEND;
+	ce_app->initialised = false;
+
+	nvgpu_mutex_acquire(&ce_app->app_mutex);
+
+	nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
+			&ce_app->allocated_contexts, nvgpu_ce_gpu_ctx, list) {
+		nvgpu_ce_delete_gpu_context_locked(ce_ctx);
+	}
+
+	nvgpu_init_list_node(&ce_app->allocated_contexts);
+	ce_app->ctx_count = 0;
+	ce_app->next_ctx_id = 0;
+
+	nvgpu_mutex_release(&ce_app->app_mutex);
+
+	nvgpu_mutex_destroy(&ce_app->app_mutex);
+free:
+	nvgpu_kfree(g, ce_app);
+	g->ce_app = NULL;
+}
+
+void nvgpu_ce_app_suspend(struct gk20a *g)
+{
+	struct nvgpu_ce_app *ce_app = g->ce_app;
+
+	if (ce_app == NULL || !ce_app->initialised) {
+		return;
+	}
+
+	ce_app->app_state = NVGPU_CE_SUSPEND;
+}
+
+/* CE app utility functions */
+u32 nvgpu_ce_app_create_context(struct gk20a *g,
+		u32 runlist_id,
+		int timeslice,
+		int runlist_level)
+{
+	struct nvgpu_ce_gpu_ctx *ce_ctx;
+	struct nvgpu_ce_app *ce_app = g->ce_app;
+	struct nvgpu_setup_bind_args setup_bind_args;
+	u32 ctx_id = NVGPU_CE_INVAL_CTX_ID;
+	int err = 0;
+
+	if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) {
+		return ctx_id;
+	}
+
+	ce_ctx = nvgpu_kzalloc(g, sizeof(*ce_ctx));
+	if (ce_ctx == NULL) {
+		return ctx_id;
+	}
+
+	nvgpu_mutex_init(&ce_ctx->gpu_ctx_mutex);
+
+	ce_ctx->g = g;
+	ce_ctx->cmd_buf_read_queue_offset = 0;
+	ce_ctx->vm = g->mm.ce.vm;
+
+	/* allocate a tsg if needed */
+	ce_ctx->tsg = nvgpu_tsg_open(g, nvgpu_current_pid(g));
+	if (ce_ctx->tsg == NULL) {
+		nvgpu_err(g, "ce: gk20a tsg not available");
+		goto end;
+	}
+
+	/* this TSG should never be aborted */
+	ce_ctx->tsg->abortable = false;
+
+	/* always kernel client needs privileged channel */
+	ce_ctx->ch = nvgpu_channel_open_new(g, runlist_id, true,
+				nvgpu_current_pid(g), nvgpu_current_tid(g));
+	if (ce_ctx->ch == NULL) {
+		nvgpu_err(g, "ce: gk20a channel not available");
+		goto end;
+	}
+
+	nvgpu_channel_wdt_disable(ce_ctx->ch->wdt);
+
+	/* bind the channel to the vm */
+	err = g->ops.mm.vm_bind_channel(g->mm.ce.vm, ce_ctx->ch);
+	if (err != 0) {
+		nvgpu_err(g, "ce: could not bind vm");
+		goto end;
+	}
+
+	err = nvgpu_tsg_bind_channel(ce_ctx->tsg, ce_ctx->ch);
+	if (err != 0) {
+		nvgpu_err(g, "ce: unable to bind to tsg");
+		goto end;
+	}
+
+	setup_bind_args.num_gpfifo_entries = 1024;
+	setup_bind_args.num_inflight_jobs = 0;
+	setup_bind_args.flags = 0;
+	err = nvgpu_channel_setup_bind(ce_ctx->ch, &setup_bind_args);
+	if (err != 0) {
+		nvgpu_err(g, "ce: unable to setup and bind channel");
+		goto end;
+	}
+
+	/* allocate command buffer from sysmem */
+	err = nvgpu_dma_alloc_map_sys(ce_ctx->vm,
+			NVGPU_CE_MAX_INFLIGHT_JOBS *
+			NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_SUBMIT,
+			&ce_ctx->cmd_buf_mem);
+	if (err != 0) {
+		nvgpu_err(g,
+			"ce: alloc command buffer failed");
+		goto end;
+	}
+
+	(void) memset(ce_ctx->cmd_buf_mem.cpu_va, 0x00,
+		ce_ctx->cmd_buf_mem.size);
+
+#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
+	/* -1 means default channel timeslice value */
+	if (timeslice != -1) {
+		err = g->ops.tsg.set_timeslice(ce_ctx->tsg, timeslice);
+		if (err != 0) {
+			nvgpu_err(g, "ce: set timesliced failed for CE context");
+			goto end;
+		}
+	}
+
+	/* -1 means default channel runlist level */
+	if (runlist_level != -1) {
+		err = nvgpu_tsg_set_interleave(ce_ctx->tsg, runlist_level);
+		if (err != 0) {
+			nvgpu_err(g, "ce: set runlist interleave failed");
+			goto end;
+		}
+	}
+#endif
+
+	nvgpu_mutex_acquire(&ce_app->app_mutex);
+	ctx_id = ce_ctx->ctx_id = ce_app->next_ctx_id;
+	nvgpu_list_add(&ce_ctx->list, &ce_app->allocated_contexts);
+	++ce_app->next_ctx_id;
+	++ce_app->ctx_count;
+	nvgpu_mutex_release(&ce_app->app_mutex);
+
+	ce_ctx->gpu_ctx_state = NVGPU_CE_GPU_CTX_ALLOCATED;
+
+end:
+	if (ctx_id == NVGPU_CE_INVAL_CTX_ID) {
+		nvgpu_mutex_acquire(&ce_app->app_mutex);
+		nvgpu_ce_delete_gpu_context_locked(ce_ctx);
+		nvgpu_mutex_release(&ce_app->app_mutex);
+	}
+	return ctx_id;
+
+}
+
+void nvgpu_ce_app_delete_context(struct gk20a *g,
+		u32 ce_ctx_id)
+{
+	struct nvgpu_ce_app *ce_app = g->ce_app;
+	struct nvgpu_ce_gpu_ctx *ce_ctx, *ce_ctx_save;
+
+	if (ce_app == NULL || !ce_app->initialised ||
+		ce_app->app_state != NVGPU_CE_ACTIVE) {
+		return;
+	}
+
+	nvgpu_mutex_acquire(&ce_app->app_mutex);
+
+	nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
+			&ce_app->allocated_contexts, nvgpu_ce_gpu_ctx, list) {
+		if (ce_ctx->ctx_id == ce_ctx_id) {
+			nvgpu_ce_delete_gpu_context_locked(ce_ctx);
+			--ce_app->ctx_count;
+			break;
+		}
+	}
+
+	nvgpu_mutex_release(&ce_app->app_mutex);
+}
--- a/drivers/gpu/nvgpu/common/ce/ce_priv.h
+++ b/drivers/gpu/nvgpu/common/ce/ce_priv.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2011-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NVGPU_CE_PRIV_H
+#define NVGPU_CE_PRIV_H
+
+#include <nvgpu/types.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/list.h>
+#include <nvgpu/lock.h>
+
+struct gk20a;
+
+/* ce context db */
+struct nvgpu_ce_gpu_ctx {
+	struct gk20a *g;
+	u32 ctx_id;
+	struct nvgpu_mutex gpu_ctx_mutex;
+	int gpu_ctx_state;
+
+	/* tsg related data */
+	struct nvgpu_tsg *tsg;
+
+	/* channel related data */
+	struct nvgpu_channel *ch;
+	struct vm_gk20a *vm;
+
+	/* cmd buf mem_desc */
+	struct nvgpu_mem cmd_buf_mem;
+	struct nvgpu_fence_type *postfences[NVGPU_CE_MAX_INFLIGHT_JOBS];
+
+	struct nvgpu_list_node list;
+
+	u32 cmd_buf_read_queue_offset;
+};
+
+/* global ce app db */
+struct nvgpu_ce_app {
+	bool initialised;
+	struct nvgpu_mutex app_mutex;
+	int app_state;
+
+	struct nvgpu_list_node allocated_contexts;
+	u32 ctx_count;
+	u32 next_ctx_id;
+};
+
+static inline struct nvgpu_ce_gpu_ctx *
+nvgpu_ce_gpu_ctx_from_list(struct nvgpu_list_node *node)
+{
+	return (struct nvgpu_ce_gpu_ctx *)
+		((uintptr_t)node - offsetof(struct nvgpu_ce_gpu_ctx, list));
+};
+
+u32 nvgpu_ce_prepare_submit(u64 src_paddr,
+		u64 dst_paddr,
+		u64 size,
+		u32 *cmd_buf_cpu_va,
+		u32 payload,
+		u32 launch_flags,
+		u32 request_operation,
+		u32 dma_copy_class);
+
+#endif /*NVGPU_CE_PRIV_H*/
--- a/drivers/gpu/nvgpu/common/cic/ce_cic.c
+++ b/drivers/gpu/nvgpu/common/cic/ce_cic.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/nvgpu_init.h>
+#include <nvgpu/nvgpu_err.h>
+#include <nvgpu/nvgpu_err_info.h>
+#include <nvgpu/cic.h>
+
+#include "cic_priv.h"
+
+void nvgpu_report_ce_err(struct gk20a *g, u32 hw_unit,
+		u32 inst, u32 err_id, u32 intr_info)
+{
+	int err = 0;
+	struct nvgpu_err_desc *err_desc = NULL;
+	struct nvgpu_err_msg err_pkt;
+
+	if (g->ops.cic.report_err == NULL) {
+		cic_dbg(g, "CIC does not support reporting error "
+			       "to safety services");
+		return;
+	}
+
+	if (hw_unit != NVGPU_ERR_MODULE_CE) {
+		nvgpu_err(g, "invalid hw module (%u)", hw_unit);
+		err = -EINVAL;
+		goto handle_report_failure;
+	}
+
+	err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
+	if (err != 0) {
+		nvgpu_err(g, "Failed to get err_desc for "
+				"err_id (%u) for hw module (%u)",
+				err_id, hw_unit);
+		goto handle_report_failure;
+	}
+
+	nvgpu_init_ce_err_msg(&err_pkt);
+	err_pkt.hw_unit_id = hw_unit;
+	err_pkt.err_id = err_desc->error_id;
+	err_pkt.is_critical = err_desc->is_critical;
+	err_pkt.err_info.ce_info.header.sub_unit_id = inst;
+	err_pkt.err_desc = err_desc;
+	/* sub_err_type can be decoded using intr_info by referring
+	 * to the interrupt status register definition corresponding
+	 * to the error that is being reported.
+	 */
+	err_pkt.err_info.ce_info.header.sub_err_type = intr_info;
+	err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
+			sizeof(err_pkt.err_info.ce_info));
+
+	if (g->ops.cic.report_err != NULL) {
+		err = g->ops.cic.report_err(g, (void *)&err_pkt,
+			sizeof(err_pkt), err_desc->is_critical);
+		if (err != 0) {
+			nvgpu_err(g, "Failed to report CE error: "
+				"inst=%u err_id=%u intr_info=%u",
+				inst, err_id, intr_info);
+		}
+	}
+handle_report_failure:
+	if (err != 0) {
+		nvgpu_sw_quiesce(g);
+	}
+}
+
+void nvgpu_inject_ce_swerror(struct gk20a *g, u32 hw_unit,
+		u32 err_index, u32 sub_err_type)
+{
+	nvgpu_report_ce_err(g, hw_unit, 0U, err_index, sub_err_type);
+}
--- a/drivers/gpu/nvgpu/common/cic/cic.c
+++ b/drivers/gpu/nvgpu/common/cic/cic.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/log.h>
+#include <nvgpu/cic.h>
+#include <nvgpu/nvgpu_err_info.h>
+
+#include "cic_priv.h"
+
+int nvgpu_cic_init_common(struct gk20a *g)
+{
+	struct nvgpu_cic *cic;
+	int err = 0;
+
+	if (g->cic != NULL) {
+		cic_dbg(g, "CIC unit already initialized");
+		return 0;
+	}
+
+	cic = nvgpu_kzalloc(g, sizeof(*cic));
+	if (cic == NULL) {
+		nvgpu_err(g, "Failed to allocate memory "
+				"for struct nvgpu_cic");
+		return -ENOMEM;
+	}
+
+	if (g->ops.cic.init != NULL) {
+		err = g->ops.cic.init(g, cic);
+		if (err != 0) {
+			nvgpu_err(g, "CIC chip specific "
+					"initialization failed.");
+			goto cleanup;
+		}
+	} else {
+		cic->err_lut = NULL;
+		cic->num_hw_modules = 0;
+	}
+
+	g->cic = cic;
+	cic_dbg(g, "CIC unit initialization done.");
+	return 0;
+
+cleanup:
+	if (cic != NULL) {
+		nvgpu_kfree(g, cic);
+	}
+	return err;
+}
+
+int nvgpu_cic_deinit_common(struct gk20a *g)
+{
+	struct nvgpu_cic *cic;
+
+	cic = g->cic;
+
+	if (cic == NULL) {
+		cic_dbg(g, "CIC unit already deinitialized");
+		return 0;
+	}
+
+	cic->err_lut = NULL;
+	cic->num_hw_modules = 0;
+
+	nvgpu_kfree(g, cic);
+	g->cic = NULL;
+
+	return 0;
+}
+
+int nvgpu_cic_check_hw_unit_id(struct gk20a *g, u32 hw_unit_id)
+{
+	if (g->cic == NULL) {
+		nvgpu_err(g, "CIC is not initialized");
+		return -EINVAL;
+	}
+
+	if (g->cic->num_hw_modules == 0U) {
+		cic_dbg(g, "LUT not initialized.");
+		return -EINVAL;
+	}
+
+	if (hw_unit_id >= g->cic->num_hw_modules) {
+		cic_dbg(g, "Invalid input HW unit ID.");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int nvgpu_cic_check_err_id(struct gk20a *g, u32 hw_unit_id,
+		u32 err_id)
+{
+	int err = 0;
+
+	if ((g->cic == NULL) || (g->cic->err_lut == NULL)) {
+		cic_dbg(g, "CIC/LUT not initialized.");
+		return -EINVAL;
+	}
+
+	err = nvgpu_cic_check_hw_unit_id(g, hw_unit_id);
+	if (err != 0) {
+		return err;
+	}
+
+	if (err_id >= g->cic->err_lut[hw_unit_id].num_errs) {
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+int nvgpu_cic_get_err_desc(struct gk20a *g, u32 hw_unit_id,
+		u32 err_id, struct nvgpu_err_desc **err_desc)
+{
+	int err = 0;
+
+	/* if (g->cic != NULL) and (g->cic->err_lut != NULL) check
+	 * can be skipped here as it checked as part of
+	 * nvgpu_cic_check_err_id() called below.
+	 */
+
+	err = nvgpu_cic_check_err_id(g, hw_unit_id, err_id);
+	if (err != 0) {
+		return err;
+	}
+
+	*err_desc = &(g->cic->err_lut[hw_unit_id].errs[err_id]);
+
+	return err;
+}
+
+int nvgpu_cic_get_num_hw_modules(struct gk20a *g)
+{
+	if (g->cic == NULL) {
+		nvgpu_err(g, "CIC is not initialized");
+		return -EINVAL;
+	}
+
+	return g->cic->num_hw_modules;
+}
--- a/drivers/gpu/nvgpu/common/cic/cic_intr.c
+++ b/drivers/gpu/nvgpu/common/cic/cic_intr.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/cic.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/nvgpu_init.h>
+#include <nvgpu/trace.h>
+
+int nvgpu_cic_wait_for_stall_interrupts(struct gk20a *g, u32 timeout)
+{
+	/* wait until all stalling irqs are handled */
+	return NVGPU_COND_WAIT(&g->mc.sw_irq_stall_last_handled_cond,
+			nvgpu_atomic_read(&g->mc.sw_irq_stall_pending) == 0,
+			timeout);
+}
+
+int nvgpu_cic_wait_for_nonstall_interrupts(struct gk20a *g, u32 timeout)
+{
+	/* wait until all non-stalling irqs are handled */
+	return NVGPU_COND_WAIT(&g->mc.sw_irq_nonstall_last_handled_cond,
+			nvgpu_atomic_read(&g->mc.sw_irq_nonstall_pending) == 0,
+			timeout);
+}
+
+void nvgpu_cic_wait_for_deferred_interrupts(struct gk20a *g)
+{
+	int ret;
+
+	ret = nvgpu_cic_wait_for_stall_interrupts(g, 0U);
+	if (ret != 0) {
+		nvgpu_err(g, "wait for stall interrupts failed %d", ret);
+	}
+
+	ret = nvgpu_cic_wait_for_nonstall_interrupts(g, 0U);
+	if (ret != 0) {
+		nvgpu_err(g, "wait for nonstall interrupts failed %d", ret);
+	}
+}
+
+void nvgpu_cic_intr_mask(struct gk20a *g)
+{
+	unsigned long flags = 0;
+
+	if (g->ops.mc.intr_mask != NULL) {
+		nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
+		g->ops.mc.intr_mask(g);
+		nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
+	}
+}
+
+#ifdef CONFIG_NVGPU_NON_FUSA
+void nvgpu_cic_log_pending_intrs(struct gk20a *g)
+{
+	if (g->ops.mc.log_pending_intrs != NULL) {
+		g->ops.mc.log_pending_intrs(g);
+	}
+}
+
+void nvgpu_cic_intr_enable(struct gk20a *g)
+{
+	unsigned long flags = 0;
+
+	if (g->ops.mc.intr_enable != NULL) {
+		nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
+		g->ops.mc.intr_enable(g);
+		nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
+	}
+}
+#endif
+
+void nvgpu_cic_intr_stall_unit_config(struct gk20a *g, u32 unit, bool enable)
+{
+	unsigned long flags = 0;
+
+	nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
+	g->ops.mc.intr_stall_unit_config(g, unit, enable);
+	nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
+}
+
+void nvgpu_cic_intr_nonstall_unit_config(struct gk20a *g, u32 unit, bool enable)
+{
+	unsigned long flags = 0;
+
+	nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
+	g->ops.mc.intr_nonstall_unit_config(g, unit, enable);
+	nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
+}
+
+void nvgpu_cic_intr_stall_pause(struct gk20a *g)
+{
+	unsigned long flags = 0;
+
+	nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
+	g->ops.mc.intr_stall_pause(g);
+	nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
+}
+
+void nvgpu_cic_intr_stall_resume(struct gk20a *g)
+{
+	unsigned long flags = 0;
+
+	nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
+	g->ops.mc.intr_stall_resume(g);
+	nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
+}
+
+void nvgpu_cic_intr_nonstall_pause(struct gk20a *g)
+{
+	unsigned long flags = 0;
+
+	nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
+	g->ops.mc.intr_nonstall_pause(g);
+	nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
+}
+
+void nvgpu_cic_intr_nonstall_resume(struct gk20a *g)
+{
+	unsigned long flags = 0;
+
+	nvgpu_spinlock_irqsave(&g->mc.intr_lock, flags);
+	g->ops.mc.intr_nonstall_resume(g);
+	nvgpu_spinunlock_irqrestore(&g->mc.intr_lock, flags);
+}
+
+static void nvgpu_cic_intr_nonstall_work(struct gk20a *g, u32 work_ops)
+{
+	bool semaphore_wakeup, post_events;
+
+	semaphore_wakeup =
+		(((work_ops & NVGPU_CIC_NONSTALL_OPS_WAKEUP_SEMAPHORE) != 0U) ?
+					true : false);
+	post_events = (((work_ops & NVGPU_CIC_NONSTALL_OPS_POST_EVENTS) != 0U) ?
+					true : false);
+
+	if (semaphore_wakeup) {
+		g->ops.semaphore_wakeup(g, post_events);
+	}
+}
+
+u32 nvgpu_cic_intr_nonstall_isr(struct gk20a *g)
+{
+	u32 non_stall_intr_val = 0U;
+
+	if (nvgpu_is_powered_off(g)) {
+		return NVGPU_CIC_INTR_UNMASK;
+	}
+
+	/* not from gpu when sharing irq with others */
+	non_stall_intr_val = g->ops.mc.intr_nonstall(g);
+	if (non_stall_intr_val == 0U) {
+		return NVGPU_CIC_INTR_NONE;
+	}
+
+	nvgpu_cic_intr_nonstall_pause(g);
+	if (g->sw_quiesce_pending) {
+		return NVGPU_CIC_INTR_QUIESCE_PENDING;
+	}
+
+	nvgpu_atomic_set(&g->mc.sw_irq_nonstall_pending, 1);
+	return NVGPU_CIC_INTR_HANDLE;
+}
+
+void nvgpu_cic_intr_nonstall_handle(struct gk20a *g)
+{
+	int err;
+	u32 nonstall_ops = 0;
+
+	nonstall_ops = g->ops.mc.isr_nonstall(g);
+	if (nonstall_ops != 0U) {
+		nvgpu_cic_intr_nonstall_work(g, nonstall_ops);
+	}
+
+	/* sync handled irq counter before re-enabling interrupts */
+	nvgpu_atomic_set(&g->mc.sw_irq_nonstall_pending, 0);
+
+	nvgpu_cic_intr_nonstall_resume(g);
+
+	err = nvgpu_cond_broadcast(&g->mc.sw_irq_nonstall_last_handled_cond);
+	if (err != 0) {
+		nvgpu_err(g, "nvgpu_cond_broadcast failed err=%d", err);
+	}
+}
+
+u32 nvgpu_cic_intr_stall_isr(struct gk20a *g)
+{
+	u32 mc_intr_0 = 0U;
+
+	nvgpu_trace_intr_stall_start(g);
+
+	if (nvgpu_is_powered_off(g)) {
+		return NVGPU_CIC_INTR_UNMASK;
+	}
+
+	/* not from gpu when sharing irq with others */
+	mc_intr_0 = g->ops.mc.intr_stall(g);
+	if (mc_intr_0 == 0U) {
+		return NVGPU_CIC_INTR_NONE;
+	}
+
+	nvgpu_cic_intr_stall_pause(g);
+
+	if (g->sw_quiesce_pending) {
+		return NVGPU_CIC_INTR_QUIESCE_PENDING;
+	}
+
+	nvgpu_atomic_set(&g->mc.sw_irq_stall_pending, 1);
+
+	nvgpu_trace_intr_stall_done(g);
+
+	return NVGPU_CIC_INTR_HANDLE;
+}
+
+void nvgpu_cic_intr_stall_handle(struct gk20a *g)
+{
+	int err;
+
+	nvgpu_trace_intr_thread_stall_start(g);
+
+	g->ops.mc.isr_stall(g);
+
+	nvgpu_trace_intr_thread_stall_done(g);
+
+	/* sync handled irq counter before re-enabling interrupts */
+	nvgpu_atomic_set(&g->mc.sw_irq_stall_pending, 0);
+	nvgpu_cic_intr_stall_resume(g);
+
+	err = nvgpu_cond_broadcast(&g->mc.sw_irq_stall_last_handled_cond);
+	if (err != 0) {
+		nvgpu_err(g, "nvgpu_cond_broadcast failed err=%d", err);
+	}
+}
--- a/drivers/gpu/nvgpu/common/cic/cic_priv.h
+++ b/drivers/gpu/nvgpu/common/cic/cic_priv.h
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef CIC_PRIV_H
+#define CIC_PRIV_H
+
+#include <nvgpu/types.h>
+
+struct gk20a;
+struct nvgpu_err_hw_module;
+struct nvgpu_err_msg;
+struct gpu_err_header;
+
+/*
+ * @file
+ *
+ * Declare CIC's private structure to store error-policy LUT and
+ * other data and ops needed during error reporting.
+ */
+
+#define ERR_INJECT_TEST_PATTERN 0xA5
+
+/*
+ * This struct contains members related to error-policy look-up table,
+ * number of units reporting errors.
+ */
+struct nvgpu_cic {
+	/** Pointer for error look-up table. */
+	struct nvgpu_err_hw_module *err_lut;
+
+	/** Total number of GPU HW modules considered in CIC. */
+	u32 num_hw_modules;
+
+};
+
+/**
+ * @brief Inject ECC error.
+ *
+ * @param g [in]		- The GPU driver struct.
+ * @param hw_unit [in]		- Index of HW unit.
+ * @param err_index [in]	- Error index.
+ * @param inst [in]		- Instance ID.
+ *
+ *  - Sets values for error address and error count.
+ *  - Invokes error reporting API with the required set of inputs.
+ *
+ * @return None
+ */
+void nvgpu_inject_ecc_swerror(struct gk20a *g, u32 hw_unit,
+		u32 err_index, u32 inst);
+
+/**
+ * @brief Inject HOST error.
+ *
+ * @param g [in]		- The GPU driver struct.
+ * @param hw_unit [in]		- Index of HW unit.
+ * @param err_index [in]	- Error index.
+ * @param sub_err_type [in]	- Sub error type.
+ *
+ *  - Invokes error reporting API with the required set of inputs.
+ *
+ * @return None
+ */
+void nvgpu_inject_host_swerror(struct gk20a *g, u32 hw_unit,
+		u32 err_index, u32 sub_err_type);
+
+/**
+ * @brief Inject GR error.
+ *
+ * @param g [in]		- The GPU driver struct.
+ * @param hw_unit [in]		- Index of HW unit.
+ * @param err_index [in]	- Error index.
+ * @param sub_err_type [in]	- Sub error type.
+ *
+ *  - Sets values for GR exception and SM machine check error information.
+ *  - Invokes error reporting API with the required set of inputs.
+ *
+ * @return None
+ */
+void nvgpu_inject_gr_swerror(struct gk20a *g, u32 hw_unit,
+		u32 err_index, u32 sub_err_type);
+
+/**
+ * @brief Inject CE error.
+ *
+ * @param g [in]		- The GPU driver struct.
+ * @param hw_unit [in]		- Index of HW unit.
+ * @param err_index [in]	- Error index.
+ * @param sub_err_type [in]	- Sub error type.
+ *
+ *  - Invokes error reporting API with the required set of inputs.
+ *
+ * @return None
+ */
+void nvgpu_inject_ce_swerror(struct gk20a *g, u32 hw_unit,
+		u32 err_index, u32 sub_err_type);
+
+/**
+ * @brief Inject CE error.
+ *
+ * @param g [in]		- The GPU driver struct.
+ * @param hw_unit [in]		- Index of HW unit.
+ * @param err_index [in]	- Error index.
+ * @param err_code [in]		- Error code.
+ *
+ *  - Invokes error reporting API with the required set of inputs.
+ *
+ * @return None
+ */
+void nvgpu_inject_pri_swerror(struct gk20a *g, u32 hw_unit,
+		u32 err_index, u32 err_code);
+
+/**
+ * @brief Inject PMU error.
+ *
+ * @param g [in]		- The GPU driver struct.
+ * @param hw_unit [in]		- Index of HW unit.
+ * @param err_index [in]	- Error index.
+ * @param sub_err_type [in]	- Sub error type.
+ *
+ *  - Sets values for error info.
+ *  - Invokes error reporting API with the required set of inputs.
+ *
+ * @return None
+ */
+void nvgpu_inject_pmu_swerror(struct gk20a *g, u32 hw_unit,
+		u32 err_index, u32 sub_err_type);
+
+/**
+ * @brief Inject CTXSW error.
+ *
+ * @param g [in]		- The GPU driver struct.
+ * @param hw_unit [in]		- Index of HW unit.
+ * @param err_index [in]	- Error index.
+ * @param inst [in]	        - Instance ID.
+ *
+ *  - Sets values for error info.
+ *  - Invokes error reporting API with the required set of inputs.
+ *
+ * @return None
+ */
+void nvgpu_inject_ctxsw_swerror(struct gk20a *g, u32 hw_unit,
+		u32 err_index, u32 inst);
+
+/**
+ * @brief Inject MMU error.
+ *
+ * @param g [in]		- The GPU driver struct.
+ * @param hw_unit [in]		- Index of HW unit.
+ * @param err_index [in]	- Error index.
+ * @param sub_err_type [in]	- Sub error type.
+ *
+ *  - Sets values for mmu page fault info.
+ *  - Invokes error reporting API with the required set of inputs.
+ *
+ * @return None
+ */
+void nvgpu_inject_mmu_swerror(struct gk20a *g, u32 hw_unit,
+		u32 err_index, u32 sub_err_type);
+
+/**
+ * @brief Initialize error message header.
+ *
+ * @param header [in]		- Error message header.
+ *
+ *  This is used to initialize error message header.
+ *
+ * @return None
+ */
+void nvgpu_init_err_msg_header(struct gpu_err_header *header);
+
+/**
+ * @brief Initialize error message.
+ *
+ * @param msg [in]		- Error message.
+ *
+ *  This is used to initialize error message that is common
+ *  for all HW units.
+ *
+ * @return None
+ */
+void nvgpu_init_err_msg(struct nvgpu_err_msg *msg);
+
+/**
+ * @brief Initialize error message for HOST unit.
+ *
+ * @param msg [in]		- Error message.
+ *
+ *  This is used to initialize error message that is specific to HOST unit.
+ *
+ * @return None
+ */
+void nvgpu_init_host_err_msg(struct nvgpu_err_msg *msg);
+
+/**
+ * @brief Initialize ECC error message.
+ *
+ * @param msg [in]		- Error message.
+ *
+ *  This is used to initialize error message that is specific to ECC errors.
+ *
+ * @return None
+ */
+void nvgpu_init_ecc_err_msg(struct nvgpu_err_msg *msg);
+
+/**
+ * @brief Initialize error message for PRI unit.
+ *
+ * @param msg [in]		- Error message.
+ *
+ *  This is used to initialize error message that is specific to PRI unit.
+ *
+ * @return None
+ */
+void nvgpu_init_pri_err_msg(struct nvgpu_err_msg *msg);
+
+/**
+ * @brief Initialize error message for CE unit.
+ *
+ * @param msg [in]		- Error message.
+ *
+ *  This is used to initialize error message that is specific to CE unit.
+ *
+ * @return None
+ */
+void nvgpu_init_ce_err_msg(struct nvgpu_err_msg *msg);
+
+/**
+ * @brief Initialize error message for PMU unit.
+ *
+ * @param msg [in]		- Error message.
+ *
+ *  This is used to initialize error message that is specific to PMU unit.
+ *
+ * @return None
+ */
+void nvgpu_init_pmu_err_msg(struct nvgpu_err_msg *msg);
+
+/**
+ * @brief Initialize error message for GR unit.
+ *
+ * @param msg [in]		- Error message.
+ *
+ *  This is used to initialize error message that is specific to GR unit.
+ *
+ * @return None
+ */
+void nvgpu_init_gr_err_msg(struct nvgpu_err_msg *msg);
+
+/**
+ * @brief Initialize error message for CTXSW.
+ *
+ * @param msg [in]		- Error message.
+ *
+ *  This is used to initialize error message that is specific to CTXSW.
+ *
+ * @return None
+ */
+void nvgpu_init_ctxsw_err_msg(struct nvgpu_err_msg *msg);
+
+/**
+ * @brief Initialize error message for MMU unit.
+ *
+ * @param msg [in]		- Error message.
+ *
+ *  This is used to initialize error message that is specific to MMU unit.
+ *
+ * @return None
+ */
+void nvgpu_init_mmu_err_msg(struct nvgpu_err_msg *msg);
+
+#endif /* CIC_PRIV_H */
--- a/drivers/gpu/nvgpu/common/cic/ctxsw_cic.c
+++ b/drivers/gpu/nvgpu/common/cic/ctxsw_cic.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/nvgpu_init.h>
+#include <nvgpu/nvgpu_err.h>
+#include <nvgpu/nvgpu_err_info.h>
+#include <nvgpu/cic.h>
+
+#include "cic_priv.h"
+
+void nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id,
+		void *data)
+{
+	int err = 0;
+	struct nvgpu_err_desc *err_desc = NULL;
+	struct nvgpu_err_msg err_pkt;
+	u32 inst = 0;
+	struct ctxsw_err_info *err_info = (struct ctxsw_err_info *)data;
+
+	if (g->ops.cic.report_err == NULL) {
+		cic_dbg(g, "CIC does not support reporting error "
+			       "to safety services");
+		return;
+	}
+
+	if (hw_unit != NVGPU_ERR_MODULE_FECS) {
+		nvgpu_err(g, "invalid hw module (%u)", hw_unit);
+		err = -EINVAL;
+		goto handle_report_failure;
+	}
+
+	err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
+	if (err != 0) {
+		nvgpu_err(g, "Failed to get err_desc for"
+			       " err_id (%u) for hw module (%u)",
+				err_id, hw_unit);
+		goto handle_report_failure;
+	}
+
+	nvgpu_init_ctxsw_err_msg(&err_pkt);
+	err_pkt.hw_unit_id = hw_unit;
+	err_pkt.err_id = err_desc->error_id;
+	err_pkt.is_critical = err_desc->is_critical;
+	err_pkt.err_info.ctxsw_info.header.sub_unit_id = inst;
+	err_pkt.err_info.ctxsw_info.curr_ctx = err_info->curr_ctx;
+	err_pkt.err_info.ctxsw_info.chid = err_info->chid;
+	err_pkt.err_info.ctxsw_info.ctxsw_status0 = err_info->ctxsw_status0;
+	err_pkt.err_info.ctxsw_info.ctxsw_status1 = err_info->ctxsw_status1;
+	err_pkt.err_info.ctxsw_info.mailbox_value = err_info->mailbox_value;
+	err_pkt.err_desc = err_desc;
+	err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
+			sizeof(err_pkt.err_info.ctxsw_info));
+
+	if (g->ops.cic.report_err != NULL) {
+		err = g->ops.cic.report_err(g, (void *)&err_pkt,
+			sizeof(err_pkt), err_desc->is_critical);
+		if (err != 0) {
+			nvgpu_err(g, "Failed to report CTXSW error: "
+					"err_id=%u, mailbox_val=%u",
+					err_id, err_info->mailbox_value);
+		}
+	}
+handle_report_failure:
+	if (err != 0) {
+		nvgpu_sw_quiesce(g);
+	}
+}
+
+void nvgpu_inject_ctxsw_swerror(struct gk20a *g, u32 hw_unit,
+		u32 err_index, u32 inst)
+{
+	struct ctxsw_err_info err_info;
+
+	(void)memset(&err_info, ERR_INJECT_TEST_PATTERN, sizeof(err_info));
+
+	nvgpu_report_ctxsw_err(g, hw_unit, err_index, (void *)&err_info);
+}
--- a/drivers/gpu/nvgpu/common/cic/ecc_cic.c
+++ b/drivers/gpu/nvgpu/common/cic/ecc_cic.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/nvgpu_init.h>
+#include <nvgpu/nvgpu_err.h>
+#include <nvgpu/nvgpu_err_info.h>
+#include <nvgpu/cic.h>
+
+#include "cic_priv.h"
+
+void nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst,
+		u32 err_id, u64 err_addr, u64 err_count)
+{
+	int err = 0;
+	struct nvgpu_err_desc *err_desc = NULL;
+	struct nvgpu_err_msg err_pkt;
+
+	if (g->ops.cic.report_err == NULL) {
+		cic_dbg(g, "CIC does not support reporting error "
+			       "to safety services");
+		return;
+	}
+
+	err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
+	if (err != 0) {
+		nvgpu_err(g, "Failed to get err_desc for "
+			       "err_id (%u) for hw module (%u)",
+				err_id, hw_unit);
+		goto handle_report_failure;
+	}
+
+	nvgpu_init_ecc_err_msg(&err_pkt);
+	err_pkt.hw_unit_id = hw_unit;
+	err_pkt.err_id = err_desc->error_id;
+	err_pkt.is_critical = err_desc->is_critical;
+	err_pkt.err_info.ecc_info.header.sub_unit_id = inst;
+	err_pkt.err_info.ecc_info.header.address = err_addr;
+	err_pkt.err_info.ecc_info.err_cnt = err_count;
+	err_pkt.err_desc = err_desc;
+	err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
+			sizeof(err_pkt.err_info.ecc_info));
+
+	if (g->ops.cic.report_err != NULL) {
+		err = g->ops.cic.report_err(g, (void *)&err_pkt,
+			sizeof(err_pkt), err_desc->is_critical);
+		if (err != 0) {
+			nvgpu_err(g, "Failed to report ECC error: hw_unit=%u, inst=%u, "
+				"err_id=%u, err_addr=%llu, err_count=%llu",
+				hw_unit, inst, err_id, err_addr, err_count);
+		}
+	}
+handle_report_failure:
+	if (err != 0) {
+		nvgpu_sw_quiesce(g);
+	}
+}
+
+void nvgpu_inject_ecc_swerror(struct gk20a *g, u32 hw_unit, u32 err_index,
+		u32 inst)
+{
+	u64 err_addr, err_count;
+
+	err_addr = (u64)ERR_INJECT_TEST_PATTERN;
+	err_count = (u64)ERR_INJECT_TEST_PATTERN;
+
+	nvgpu_report_ecc_err(g, hw_unit, inst, err_index, err_addr, err_count);
+}
--- a/drivers/gpu/nvgpu/common/cic/gr_cic.c
+++ b/drivers/gpu/nvgpu/common/cic/gr_cic.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/nvgpu_init.h>
+#include <nvgpu/nvgpu_err.h>
+#include <nvgpu/nvgpu_err_info.h>
+#include <nvgpu/cic.h>
+
+#include "cic_priv.h"
+
+static void nvpgu_report_fill_err_info(u32 hw_unit,
+		struct nvgpu_err_msg *err_pkt, struct gr_err_info *err_info)
+{
+	if (hw_unit == NVGPU_ERR_MODULE_SM) {
+		struct gr_sm_mcerr_info *info = err_info->sm_mcerr_info;
+
+		err_pkt->err_info.sm_info.warp_esr_pc =
+			info->hww_warp_esr_pc;
+		err_pkt->err_info.sm_info.warp_esr_status =
+			info->hww_warp_esr_status;
+		err_pkt->err_info.sm_info.curr_ctx =
+			info->curr_ctx;
+		err_pkt->err_info.sm_info.chid =
+			info->chid;
+		err_pkt->err_info.sm_info.tsgid =
+			info->tsgid;
+		err_pkt->err_info.sm_info.gpc =
+			info->gpc;
+		err_pkt->err_info.sm_info.tpc =
+			info->tpc;
+		err_pkt->err_info.sm_info.sm =
+			info->sm;
+	} else {
+		struct gr_exception_info *info  = err_info->exception_info;
+
+		err_pkt->err_info.gr_info.curr_ctx = info->curr_ctx;
+		err_pkt->err_info.gr_info.chid = info->chid;
+		err_pkt->err_info.gr_info.tsgid = info->tsgid;
+		err_pkt->err_info.gr_info.status = info->status;
+	}
+}
+
+void nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst,
+		u32 err_id, struct gr_err_info *err_info, u32 sub_err_type)
+{
+	int err = 0;
+	struct nvgpu_err_desc *err_desc = NULL;
+	struct nvgpu_err_msg err_pkt;
+
+	if (g->ops.cic.report_err == NULL) {
+		cic_dbg(g, "CIC does not support reporting error "
+			       "to safety services");
+		return;
+	}
+
+	if ((hw_unit != NVGPU_ERR_MODULE_SM) &&
+			(hw_unit != NVGPU_ERR_MODULE_PGRAPH)) {
+		nvgpu_err(g, "invalid hw module (%u)", hw_unit);
+		err = -EINVAL;
+		goto handle_report_failure;
+	}
+
+	err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
+	if (err != 0) {
+		nvgpu_err(g, "Failed to get err_desc for "
+				"err_id (%u) for hw module (%u)",
+				err_id, hw_unit);
+		goto handle_report_failure;
+	}
+
+	nvgpu_init_gr_err_msg(&err_pkt);
+	err_pkt.hw_unit_id = hw_unit;
+	err_pkt.err_id = err_desc->error_id;
+	err_pkt.is_critical = err_desc->is_critical;
+	err_pkt.err_desc = err_desc;
+	err_pkt.err_info.gr_info.header.sub_err_type = sub_err_type;
+	err_pkt.err_info.gr_info.header.sub_unit_id = inst;
+	nvpgu_report_fill_err_info(hw_unit, &err_pkt, err_info);
+	err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(sizeof(err_pkt.err_info));
+
+	if (g->ops.cic.report_err != NULL) {
+		err = g->ops.cic.report_err(g, (void *)&err_pkt,
+			sizeof(err_pkt), err_desc->is_critical);
+		if (err != 0) {
+			if (hw_unit == NVGPU_ERR_MODULE_SM) {
+				nvgpu_err(g, "Failed to report SM exception"
+					"gpc=%u, tpc=%u, sm=%u, esr_status=%x",
+					err_pkt.err_info.sm_info.gpc,
+					err_pkt.err_info.sm_info.tpc,
+					err_pkt.err_info.sm_info.sm,
+					err_pkt.err_info.sm_info.warp_esr_status);
+			}
+			if (hw_unit == NVGPU_ERR_MODULE_PGRAPH) {
+				nvgpu_err(g, "Failed to report PGRAPH"
+					"exception: inst=%u, err_id=%u, "
+					"status=%u", inst, err_id,
+					err_pkt.err_info.gr_info.status);
+			}
+		}
+	}
+handle_report_failure:
+	if (err != 0) {
+		nvgpu_sw_quiesce(g);
+	}
+}
+
+void nvgpu_inject_gr_swerror(struct gk20a *g, u32 hw_unit,
+		u32 err_index, u32 sub_err_type)
+{
+	struct gr_err_info err_info;
+	struct gr_exception_info gr_error_info;
+	struct gr_sm_mcerr_info sm_error_info;
+	int err = 0;
+	u32 inst = 0U;
+
+	/*
+	 * Fill fixed test pattern data for the error message
+	 * payload.
+	 */
+	(void)memset(&gr_error_info, ERR_INJECT_TEST_PATTERN, sizeof(gr_error_info));
+	(void)memset(&sm_error_info, ERR_INJECT_TEST_PATTERN, sizeof(sm_error_info));
+
+	switch (hw_unit) {
+	case NVGPU_ERR_MODULE_PGRAPH:
+		{
+			err_info.exception_info = &gr_error_info;
+		}
+		break;
+
+	case NVGPU_ERR_MODULE_SM:
+		{
+			err_info.sm_mcerr_info = &sm_error_info;
+		}
+		break;
+
+	default:
+		{
+			nvgpu_err(g, "unsupported hw_unit(%u)", hw_unit);
+			err = -EINVAL;
+		}
+		break;
+	}
+	if (err != 0) {
+		return;
+	}
+
+	nvgpu_report_gr_err(g, hw_unit, inst, err_index,
+			&err_info, sub_err_type);
+}
--- a/drivers/gpu/nvgpu/common/cic/host_cic.c
+++ b/drivers/gpu/nvgpu/common/cic/host_cic.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/nvgpu_init.h>
+#include <nvgpu/nvgpu_err.h>
+#include <nvgpu/nvgpu_err_info.h>
+#include <nvgpu/cic.h>
+
+#include "cic_priv.h"
+
+void nvgpu_report_host_err(struct gk20a *g, u32 hw_unit,
+		u32 inst, u32 err_id, u32 intr_info)
+{
+	int err = 0;
+	struct nvgpu_err_desc *err_desc = NULL;
+	struct nvgpu_err_msg err_pkt;
+
+	if (g->ops.cic.report_err == NULL) {
+		cic_dbg(g, "CIC does not support reporting error "
+			       "to safety services");
+		return;
+	}
+
+	if (hw_unit != NVGPU_ERR_MODULE_HOST) {
+		nvgpu_err(g, "invalid hw module (%u)", hw_unit);
+		err = -EINVAL;
+		goto handle_report_failure;
+	}
+
+	err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
+	if (err != 0) {
+		nvgpu_err(g, "Failed to get err_desc for "
+				"err_id (%u) for hw module (%u)",
+				err_id, hw_unit);
+		goto handle_report_failure;
+	}
+
+	nvgpu_init_host_err_msg(&err_pkt);
+	err_pkt.hw_unit_id = hw_unit;
+	err_pkt.err_id = err_desc->error_id;
+	err_pkt.is_critical = err_desc->is_critical;
+	err_pkt.err_info.host_info.header.sub_unit_id = inst;
+	err_pkt.err_desc = err_desc;
+	/* sub_err_type can be decoded using intr_info by referring
+	 * to the interrupt status register definition corresponding
+	 * to the error that is being reported.
+	 */
+	err_pkt.err_info.host_info.header.sub_err_type = intr_info;
+	err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
+			sizeof(err_pkt.err_info.host_info));
+
+	if (g->ops.cic.report_err != NULL) {
+		err = g->ops.cic.report_err(g, (void *)&err_pkt,
+			sizeof(err_pkt), err_desc->is_critical);
+		if (err != 0) {
+			nvgpu_err(g, "Failed to report HOST error: "
+					"inst=%u, err_id=%u, intr_info=%u",
+					inst, err_id, intr_info);
+		}
+	}
+handle_report_failure:
+	if (err != 0) {
+		nvgpu_sw_quiesce(g);
+	}
+}
+
+void nvgpu_inject_host_swerror(struct gk20a *g, u32 hw_unit,
+		u32 err_index, u32 sub_err_type)
+{
+	nvgpu_report_host_err(g, hw_unit, 0U, err_index, sub_err_type);
+}
--- a/drivers/gpu/nvgpu/common/cic/mmu_cic.c
+++ b/drivers/gpu/nvgpu/common/cic/mmu_cic.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/nvgpu_init.h>
+#include <nvgpu/nvgpu_err.h>
+#include <nvgpu/nvgpu_err_info.h>
+#include <nvgpu/cic.h>
+
+#include "cic_priv.h"
+
+void nvgpu_report_mmu_err(struct gk20a *g, u32 hw_unit, u32 err_id,
+		struct mmu_fault_info *fault_info, u32 status, u32 sub_err_type)
+{
+	int err = 0;
+	struct nvgpu_err_desc *err_desc = NULL;
+	struct nvgpu_err_msg err_pkt;
+
+	if (g->ops.cic.report_err == NULL) {
+		cic_dbg(g, "CIC does not support reporting error "
+			       "to safety services");
+		return;
+	}
+
+	if (hw_unit != NVGPU_ERR_MODULE_HUBMMU) {
+		nvgpu_err(g, "invalid hw module (%u)", hw_unit);
+		err = -EINVAL;
+		goto handle_report_failure;
+	}
+
+	err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
+	if (err != 0) {
+		nvgpu_err(g, "Failed to get err_desc for "
+				"err_id (%u) for hw module (%u)",
+				err_id, hw_unit);
+		goto handle_report_failure;
+	}
+
+	nvgpu_init_mmu_err_msg(&err_pkt);
+	err_pkt.hw_unit_id = hw_unit;
+	err_pkt.err_id = err_desc->error_id;
+	err_pkt.is_critical = err_desc->is_critical;
+	err_pkt.err_info.mmu_info.header.sub_err_type = sub_err_type;
+	err_pkt.err_info.mmu_info.status = status;
+	/* Copy contents of mmu_fault_info */
+	if (fault_info != NULL) {
+		err_pkt.err_info.mmu_info.info.inst_ptr = fault_info->inst_ptr;
+		err_pkt.err_info.mmu_info.info.inst_aperture
+			= fault_info->inst_aperture;
+		err_pkt.err_info.mmu_info.info.fault_addr
+			= fault_info->fault_addr;
+		err_pkt.err_info.mmu_info.info.fault_addr_aperture
+			= fault_info->fault_addr_aperture;
+		err_pkt.err_info.mmu_info.info.timestamp_lo
+			= fault_info->timestamp_lo;
+		err_pkt.err_info.mmu_info.info.timestamp_hi
+			= fault_info->timestamp_hi;
+		err_pkt.err_info.mmu_info.info.mmu_engine_id
+			= fault_info->mmu_engine_id;
+		err_pkt.err_info.mmu_info.info.gpc_id = fault_info->gpc_id;
+		err_pkt.err_info.mmu_info.info.client_type
+			= fault_info->client_type;
+		err_pkt.err_info.mmu_info.info.client_id
+			= fault_info->client_id;
+		err_pkt.err_info.mmu_info.info.fault_type
+			= fault_info->fault_type;
+		err_pkt.err_info.mmu_info.info.access_type
+			= fault_info->access_type;
+		err_pkt.err_info.mmu_info.info.protected_mode
+			= fault_info->protected_mode;
+		err_pkt.err_info.mmu_info.info.replayable_fault
+			= fault_info->replayable_fault;
+		err_pkt.err_info.mmu_info.info.replay_fault_en
+			= fault_info->replay_fault_en;
+		err_pkt.err_info.mmu_info.info.valid = fault_info->valid;
+		err_pkt.err_info.mmu_info.info.faulted_pbdma =
+			fault_info->faulted_pbdma;
+		err_pkt.err_info.mmu_info.info.faulted_engine =
+			fault_info->faulted_engine;
+		err_pkt.err_info.mmu_info.info.faulted_subid =
+			fault_info->faulted_subid;
+		err_pkt.err_info.mmu_info.info.chid = fault_info->chid;
+	}
+	err_pkt.err_desc = err_desc;
+	err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
+			sizeof(err_pkt.err_info.mmu_info));
+
+	if (g->ops.cic.report_err != NULL) {
+		err = g->ops.cic.report_err(g, (void *)&err_pkt,
+			sizeof(err_pkt), err_desc->is_critical);
+		if (err != 0) {
+			nvgpu_err(g, "Failed to report MMU fault: hw_unit=%u, "
+				"err_id=%u, sub_err_type=%u, status=%u",
+				hw_unit, err_id, sub_err_type, status);
+		}
+	}
+handle_report_failure:
+	if (err != 0) {
+		nvgpu_sw_quiesce(g);
+	}
+}
+
+void nvgpu_inject_mmu_swerror(struct gk20a *g, u32 hw_unit, u32 err_index,
+		u32 sub_err_type)
+{
+	u32 status = 0U;
+	struct mmu_fault_info fault_info;
+
+	(void) memset(&fault_info, ERR_INJECT_TEST_PATTERN, sizeof(fault_info));
+	nvgpu_report_mmu_err(g, hw_unit, err_index,
+			&fault_info, status, sub_err_type);
+}
--- a/drivers/gpu/nvgpu/common/cic/msg_cic.c
+++ b/drivers/gpu/nvgpu/common/cic/msg_cic.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/nvgpu_err_info.h>
+#include <nvgpu/string.h>
+
+#include "cic_priv.h"
+
+void nvgpu_init_err_msg_header(struct gpu_err_header *header)
+{
+	header->version.major = (u16)1U;
+	header->version.minor = (u16)0U;
+	header->sub_err_type = 0U;
+	header->sub_unit_id = 0UL;
+	header->address = 0UL;
+	header->timestamp_ns = 0UL;
+}
+
+void nvgpu_init_err_msg(struct nvgpu_err_msg *msg)
+{
+	(void) memset(msg, 0, sizeof(struct nvgpu_err_msg));
+	msg->hw_unit_id = 0U;
+	msg->is_critical = false;
+	msg->err_id = (u8)0U;
+	msg->err_size = (u8)0U;
+}
+
+void nvgpu_init_host_err_msg(struct nvgpu_err_msg *msg)
+{
+	nvgpu_init_err_msg(msg);
+	nvgpu_init_err_msg_header(&msg->err_info.host_info.header);
+}
+
+void nvgpu_init_ecc_err_msg(struct nvgpu_err_msg *msg)
+{
+	nvgpu_init_err_msg(msg);
+	nvgpu_init_err_msg_header(&msg->err_info.ecc_info.header);
+	msg->err_info.ecc_info.err_cnt = 0UL;
+}
+
+void nvgpu_init_pri_err_msg(struct nvgpu_err_msg *msg)
+{
+	nvgpu_init_err_msg(msg);
+	nvgpu_init_err_msg_header(&msg->err_info.pri_info.header);
+}
+
+void nvgpu_init_ce_err_msg(struct nvgpu_err_msg *msg)
+{
+	nvgpu_init_err_msg(msg);
+	nvgpu_init_err_msg_header(&msg->err_info.ce_info.header);
+}
+
+void nvgpu_init_pmu_err_msg(struct nvgpu_err_msg *msg)
+{
+	nvgpu_init_err_msg(msg);
+	nvgpu_init_err_msg_header(&msg->err_info.pmu_err_info.header);
+	msg->err_info.pmu_err_info.status = 0U;
+}
+
+void nvgpu_init_gr_err_msg(struct nvgpu_err_msg *msg)
+{
+	nvgpu_init_err_msg(msg);
+	nvgpu_init_err_msg_header(&msg->err_info.gr_info.header);
+	msg->err_info.gr_info.curr_ctx = 0U;
+	msg->err_info.gr_info.chid = 0U;
+	msg->err_info.gr_info.tsgid = 0U;
+	msg->err_info.gr_info.status = 0U;
+}
+
+void nvgpu_init_ctxsw_err_msg(struct nvgpu_err_msg *msg)
+{
+	nvgpu_init_err_msg(msg);
+	nvgpu_init_err_msg_header(&msg->err_info.ctxsw_info.header);
+	msg->err_info.ctxsw_info.curr_ctx = 0U;
+	msg->err_info.ctxsw_info.tsgid = 0U;
+	msg->err_info.ctxsw_info.chid = 0U;
+	msg->err_info.ctxsw_info.ctxsw_status0 = 0U;
+	msg->err_info.ctxsw_info.ctxsw_status1 = 0U;
+	msg->err_info.ctxsw_info.mailbox_value = 0U;
+}
+
+void nvgpu_init_mmu_err_msg(struct nvgpu_err_msg *msg)
+{
+	nvgpu_init_err_msg(msg);
+	nvgpu_init_err_msg_header(&msg->err_info.mmu_info.header);
+	msg->err_info.mmu_info.info.inst_ptr = 0UL;
+	msg->err_info.mmu_info.info.inst_aperture = 0U;
+	msg->err_info.mmu_info.info.fault_addr = 0UL;
+	msg->err_info.mmu_info.info.fault_addr_aperture = 0U;
+	msg->err_info.mmu_info.info.timestamp_lo = 0U;
+	msg->err_info.mmu_info.info.timestamp_hi = 0U;
+	msg->err_info.mmu_info.info.mmu_engine_id = 0U;
+	msg->err_info.mmu_info.info.gpc_id = 0U;
+	msg->err_info.mmu_info.info.client_type = 0U;
+	msg->err_info.mmu_info.info.client_id = 0U;
+	msg->err_info.mmu_info.info.fault_type = 0U;
+	msg->err_info.mmu_info.info.access_type = 0U;
+	msg->err_info.mmu_info.info.protected_mode = 0U;
+	msg->err_info.mmu_info.info.replayable_fault = false;
+	msg->err_info.mmu_info.info.replay_fault_en = 0U;
+	msg->err_info.mmu_info.info.valid = false;
+	msg->err_info.mmu_info.info.faulted_pbdma = 0U;
+	msg->err_info.mmu_info.info.faulted_engine = 0U;
+	msg->err_info.mmu_info.info.faulted_subid = 0U;
+	msg->err_info.mmu_info.info.chid = 0U;
+	msg->err_info.mmu_info.status = 0U;
+}
--- a/drivers/gpu/nvgpu/common/cic/pmu_cic.c
+++ b/drivers/gpu/nvgpu/common/cic/pmu_cic.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/nvgpu_init.h>
+#include <nvgpu/nvgpu_err.h>
+#include <nvgpu/nvgpu_err_info.h>
+#include <nvgpu/cic.h>
+
+#include "cic_priv.h"
+
+void nvgpu_report_pmu_err(struct gk20a *g, u32 hw_unit, u32 err_id,
+		u32 sub_err_type, u32 status)
+{
+	int err = 0;
+	struct nvgpu_err_desc *err_desc = NULL;
+	struct nvgpu_err_msg err_pkt;
+
+	if (g->ops.cic.report_err == NULL) {
+		cic_dbg(g, "CIC does not support reporting error "
+			       "to safety services");
+		return;
+	}
+
+	if (hw_unit != NVGPU_ERR_MODULE_PMU) {
+		nvgpu_err(g, "invalid hw module (%u)", hw_unit);
+		err = -EINVAL;
+		goto handle_report_failure;
+	}
+
+	err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
+	if (err != 0) {
+		nvgpu_err(g, "Failed to get err_desc for "
+				"err_id (%u) for hw module (%u)",
+				err_id, hw_unit);
+		goto handle_report_failure;
+	}
+
+	nvgpu_init_pmu_err_msg(&err_pkt);
+	err_pkt.hw_unit_id = hw_unit;
+	err_pkt.err_id = err_desc->error_id;
+	err_pkt.is_critical = err_desc->is_critical;
+	err_pkt.err_info.pmu_err_info.status = status;
+	err_pkt.err_info.pmu_err_info.header.sub_err_type = sub_err_type;
+	err_pkt.err_desc = err_desc;
+	err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
+			sizeof(err_pkt.err_info.pmu_err_info));
+
+	if (g->ops.cic.report_err != NULL) {
+		err = g->ops.cic.report_err(g, (void *)&err_pkt,
+			sizeof(err_pkt), err_desc->is_critical);
+		if (err != 0) {
+			nvgpu_err(g, "Failed to report PMU error: "
+				"err_id=%u, sub_err_type=%u, status=%u",
+				err_id, sub_err_type, status);
+		}
+	}
+handle_report_failure:
+	if (err != 0) {
+		nvgpu_sw_quiesce(g);
+	}
+}
+
+void nvgpu_inject_pmu_swerror(struct gk20a *g, u32 hw_unit,
+		u32 err_index, u32 sub_err_type)
+{
+	u32 err_info;
+
+	err_info = (u32)ERR_INJECT_TEST_PATTERN;
+
+	nvgpu_report_pmu_err(g, hw_unit, err_index, sub_err_type, err_info);
+}
--- a/drivers/gpu/nvgpu/common/cic/pri_cic.c
+++ b/drivers/gpu/nvgpu/common/cic/pri_cic.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/nvgpu_init.h>
+#include <nvgpu/nvgpu_err.h>
+#include <nvgpu/nvgpu_err_info.h>
+#include <nvgpu/cic.h>
+
+#include "cic_priv.h"
+
+void nvgpu_report_pri_err(struct gk20a *g, u32 hw_unit, u32 inst,
+		u32 err_id, u32 err_addr, u32 err_code)
+{
+	int err = 0;
+	struct nvgpu_err_desc *err_desc = NULL;
+	struct nvgpu_err_msg err_pkt;
+
+	if (g->ops.cic.report_err == NULL) {
+		cic_dbg(g, "CIC does not support reporting error "
+			       "to safety services");
+		return;
+	}
+
+	if (hw_unit != NVGPU_ERR_MODULE_PRI) {
+		nvgpu_err(g, "invalid hw module (%u)", hw_unit);
+		err = -EINVAL;
+		goto handle_report_failure;
+	}
+
+	err = nvgpu_cic_get_err_desc(g, hw_unit, err_id, &err_desc);
+	if (err != 0) {
+		nvgpu_err(g, "Failed to get err_desc for "
+				"err_id (%u) for hw module (%u)",
+				err_id, hw_unit);
+		goto handle_report_failure;
+	}
+
+	nvgpu_init_pri_err_msg(&err_pkt);
+	err_pkt.hw_unit_id = hw_unit;
+	err_pkt.err_id = err_desc->error_id;
+	err_pkt.is_critical = err_desc->is_critical;
+	err_pkt.err_info.pri_info.header.sub_unit_id = inst;
+	err_pkt.err_info.pri_info.header.address = (u64) err_addr;
+	err_pkt.err_desc = err_desc;
+	/* sub_err_type can be decoded using err_code by referring
+	 * to the FECS pri error codes.
+	 */
+	err_pkt.err_info.pri_info.header.sub_err_type = err_code;
+	err_pkt.err_size = nvgpu_safe_cast_u64_to_u8(
+			sizeof(err_pkt.err_info.pri_info));
+
+	if (g->ops.cic.report_err != NULL) {
+		err = g->ops.cic.report_err(g, (void *)&err_pkt,
+			sizeof(err_pkt), err_desc->is_critical);
+		if (err != 0) {
+			nvgpu_err(g, "Failed to report PRI error: "
+					"inst=%u, err_id=%u, err_code=%u",
+					inst, err_id, err_code);
+		}
+	}
+handle_report_failure:
+	if (err != 0) {
+		nvgpu_sw_quiesce(g);
+	}
+}
+
+void nvgpu_inject_pri_swerror(struct gk20a *g, u32 hw_unit,
+		u32 err_index, u32 err_code)
+{
+	nvgpu_report_pri_err(g, hw_unit, 0U, err_index, 0U, err_code);
+}
--- a/drivers/gpu/nvgpu/common/clk_arb/clk_arb.c
+++ b/drivers/gpu/nvgpu/common/clk_arb/clk_arb.c
@@ -0,0 +1,826 @@
+/*
+ * Copyright (c) 2016-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/bitops.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/kref.h>
+#include <nvgpu/log.h>
+#include <nvgpu/barrier.h>
+#include <nvgpu/cond.h>
+#include <nvgpu/list.h>
+#include <nvgpu/clk_arb.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/worker.h>
+#include <nvgpu/gk20a.h>
+#ifdef CONFIG_NVGPU_LS_PMU
+#include <nvgpu/pmu/clk/clk.h>
+#include <nvgpu/pmu/perf.h>
+#include <nvgpu/pmu/volt.h>
+#endif
+#include <nvgpu/boardobjgrp_e255.h>
+
+int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
+				struct nvgpu_clk_notification_queue *queue,
+				u32 events_number) {
+	queue->clk_q_notifications = nvgpu_kcalloc(g, events_number,
+		sizeof(struct nvgpu_clk_notification));
+	if (queue->clk_q_notifications == NULL) {
+		return -ENOMEM;
+	}
+	queue->size = events_number;
+
+	nvgpu_atomic_set(&queue->head, 0);
+	nvgpu_atomic_set(&queue->tail, 0);
+
+	return 0;
+}
+
+void nvgpu_clk_notification_queue_free(struct gk20a *g,
+		struct nvgpu_clk_notification_queue *queue) {
+	if (queue->size > 0U) {
+		nvgpu_kfree(g, queue->clk_q_notifications);
+		queue->size = 0;
+		nvgpu_atomic_set(&queue->head, 0);
+		nvgpu_atomic_set(&queue->tail, 0);
+	}
+}
+
+static void nvgpu_clk_arb_queue_notification(struct gk20a *g,
+				struct nvgpu_clk_notification_queue *queue,
+				u32 alarm_mask) {
+
+	u32 queue_index;
+	u64 timestamp = 0U;
+
+	queue_index = U32(nvgpu_atomic_inc_return(&queue->tail)) % queue->size;
+
+#ifdef CONFIG_NVGPU_NON_FUSA
+	/* get current timestamp */
+	timestamp = (u64) nvgpu_hr_timestamp();
+#endif
+
+	queue->clk_q_notifications[queue_index].timestamp = timestamp;
+	queue->clk_q_notifications[queue_index].clk_notification = alarm_mask;
+
+}
+
+void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+
+	u64 current_mask;
+	u32 refcnt;
+	u32 alarm_mask;
+	u64 new_mask;
+
+	do {
+		current_mask = (u64)nvgpu_atomic64_read(&arb->alarm_mask);
+		/* atomic operations are strong so they do not need masks */
+
+		refcnt = ((u32) (current_mask >> 32)) + 1U;
+		alarm_mask =  (u32) (current_mask &  ~U32(0)) | alarm;
+		new_mask = ((u64) refcnt << 32) | alarm_mask;
+
+	} while (unlikely(current_mask !=
+			(u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
+				(long int)current_mask, (long int)new_mask)));
+
+	nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm);
+}
+
+#ifdef CONFIG_NVGPU_LS_PMU
+int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb)
+{
+	struct gk20a *g = arb->g;
+	struct nvgpu_clk_vf_table *table;
+
+	u32 i, j;
+	int status = -EINVAL;
+	u16 clk_cur;
+	u32 num_points;
+
+	struct nvgpu_pmu_perf_pstate_clk_info *p0_info;
+
+	table = NV_READ_ONCE(arb->current_vf_table);
+	/* make flag visible when all data has resolved in the tables */
+	nvgpu_smp_rmb();
+	table = (table == &arb->vf_table_pool[0]) ? &arb->vf_table_pool[1] :
+		&arb->vf_table_pool[0];
+
+	/* Get allowed memory ranges */
+	if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPCCLK,
+						&arb->gpc2clk_min,
+						&arb->gpc2clk_max) < 0) {
+		nvgpu_err(g, "failed to fetch GPC2CLK range");
+		goto exit_vf_table;
+	}
+
+	if (g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_MCLK,
+						&arb->mclk_min,
+						&arb->mclk_max) < 0) {
+		nvgpu_err(g, "failed to fetch MCLK range");
+		goto exit_vf_table;
+	}
+
+	table->gpc2clk_num_points = MAX_F_POINTS;
+	table->mclk_num_points = MAX_F_POINTS;
+	if (g->ops.clk.clk_domain_get_f_points(arb->g, CTRL_CLK_DOMAIN_GPCCLK,
+		&table->gpc2clk_num_points, arb->gpc2clk_f_points)) {
+		nvgpu_err(g, "failed to fetch GPC2CLK frequency points");
+		goto exit_vf_table;
+	}
+	if (!table->gpc2clk_num_points) {
+		nvgpu_err(g, "empty queries to f points gpc2clk %d", table->gpc2clk_num_points);
+		status = -EINVAL;
+		goto exit_vf_table;
+	}
+
+	(void) memset(table->gpc2clk_points, 0,
+		table->gpc2clk_num_points*sizeof(struct nvgpu_clk_vf_point));
+
+	p0_info = nvgpu_pmu_perf_pstate_get_clk_set_info(g,
+			CTRL_PERF_PSTATE_P0, CLKWHICH_GPCCLK);
+	if (!p0_info) {
+		status = -EINVAL;
+		nvgpu_err(g, "failed to get GPC2CLK P0 info");
+		goto exit_vf_table;
+	}
+
+	/* GPC2CLK needs to be checked in two passes. The first determines the
+	 * relationships between GPC2CLK, SYS2CLK and XBAR2CLK, while the
+	 * second verifies that the clocks minimum is satisfied and sets
+	 * the voltages,the later part is done in nvgpu_pmu_perf_changeseq_set_clks
+	 */
+	j = 0; num_points = 0; clk_cur = 0;
+	for (i = 0; i < table->gpc2clk_num_points; i++) {
+		struct nvgpu_clk_slave_freq setfllclk;
+
+		if ((arb->gpc2clk_f_points[i] >= arb->gpc2clk_min) &&
+			(arb->gpc2clk_f_points[i] <= arb->gpc2clk_max) &&
+			(arb->gpc2clk_f_points[i] != clk_cur)) {
+
+			table->gpc2clk_points[j].gpc_mhz =
+				arb->gpc2clk_f_points[i];
+			setfllclk.gpc_mhz = arb->gpc2clk_f_points[i];
+
+			status = clk_get_fll_clks_per_clk_domain(g, &setfllclk);
+			if (status < 0) {
+				nvgpu_err(g,
+					"failed to get GPC2CLK slave clocks");
+				goto exit_vf_table;
+			}
+
+			table->gpc2clk_points[j].sys_mhz =
+				setfllclk.sys_mhz;
+			table->gpc2clk_points[j].xbar_mhz =
+				setfllclk.xbar_mhz;
+			table->gpc2clk_points[j].nvd_mhz =
+				setfllclk.nvd_mhz;
+			table->gpc2clk_points[j].host_mhz =
+				setfllclk.host_mhz;
+
+			clk_cur = table->gpc2clk_points[j].gpc_mhz;
+
+			if ((clk_cur >= p0_info->min_mhz) &&
+					(clk_cur <= p0_info->max_mhz)) {
+				VF_POINT_SET_PSTATE_SUPPORTED(
+					&table->gpc2clk_points[j],
+					CTRL_PERF_PSTATE_P0);
+			}
+
+			j++;
+			num_points++;
+		}
+	}
+	table->gpc2clk_num_points = num_points;
+
+	/* make table visible when all data has resolved in the tables */
+	nvgpu_smp_wmb();
+	arb->current_vf_table = table;
+
+exit_vf_table:
+
+	if (status < 0) {
+		nvgpu_clk_arb_set_global_alarm(g,
+			EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
+	}
+	nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
+
+	return status;
+}
+
+static void nvgpu_clk_arb_run_vf_table_cb(struct nvgpu_clk_arb *arb)
+{
+	struct gk20a *g = arb->g;
+	int err;
+
+	/* get latest vf curve from pmu */
+	err = nvgpu_clk_vf_point_cache(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to cache VF table");
+		nvgpu_clk_arb_set_global_alarm(g,
+			EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
+		nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
+
+		return;
+	}
+	nvgpu_clk_arb_update_vf_table(arb);
+}
+#endif
+u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
+				struct nvgpu_clk_arb_target *target,
+				u32 alarm) {
+
+	struct nvgpu_clk_session *session = dev->session;
+	struct nvgpu_clk_arb *arb = session->g->clk_arb;
+	struct nvgpu_clk_notification *l_notification;
+
+	u32 queue_alarm_mask = 0;
+	u32 enabled_mask = 0;
+	u32 new_alarms_reported = 0;
+	u32 poll_mask = 0;
+	u32 tail, head, index;
+	u32 queue_index;
+	size_t size;
+
+	enabled_mask = (u32)nvgpu_atomic_read(&dev->enabled_mask);
+	size = arb->notification_queue.size;
+
+	/* queue global arbiter notifications in buffer */
+	do {
+		tail = (u32)nvgpu_atomic_read(&arb->notification_queue.tail);
+		/* copy items to the queue */
+		queue_index = (u32)nvgpu_atomic_read(&dev->queue.tail);
+		head = dev->arb_queue_head;
+		head = (tail - head) < arb->notification_queue.size ?
+			head : tail - arb->notification_queue.size;
+
+		for (index = head; WRAPGTEQ(tail, index); index++) {
+			u32 alarm_detected;
+
+			l_notification = &arb->notification_queue.
+				clk_q_notifications[((u64)index + 1ULL) % size];
+			alarm_detected = NV_READ_ONCE(
+					l_notification->clk_notification);
+
+			if ((enabled_mask & alarm_detected) == 0U) {
+				continue;
+			}
+
+			queue_index++;
+			dev->queue.clk_q_notifications[
+				queue_index % dev->queue.size].timestamp =
+				NV_READ_ONCE(l_notification->timestamp);
+
+			dev->queue.clk_q_notifications[queue_index %
+				dev->queue.size].clk_notification =
+					alarm_detected;
+
+			queue_alarm_mask |= alarm_detected;
+		}
+	} while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) !=
+			(int)tail));
+
+	nvgpu_atomic_set(&dev->queue.tail, (int)queue_index);
+	/* update the last notification we processed from global queue */
+
+	dev->arb_queue_head = tail;
+
+	/* Check if current session targets are met */
+	if ((enabled_mask & EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE)) != 0U) {
+		if ((target->gpc2clk < session->target->gpc2clk)
+			|| (target->mclk < session->target->mclk)) {
+
+			poll_mask |= (NVGPU_POLLIN | NVGPU_POLLPRI);
+			nvgpu_clk_arb_queue_notification(arb->g, &dev->queue,
+				EVENT(ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE));
+		}
+	}
+
+	/* Check if there is a new VF update */
+	if ((queue_alarm_mask & EVENT(VF_UPDATE)) != 0U) {
+		poll_mask |= (NVGPU_POLLIN | NVGPU_POLLRDNORM);
+	}
+
+	/* Notify sticky alarms that were not reported on previous run*/
+	new_alarms_reported = (queue_alarm_mask |
+			(alarm & ~dev->alarms_reported & queue_alarm_mask));
+
+	if ((new_alarms_reported & ~LOCAL_ALARM_MASK) != 0U) {
+		/* check that we are not re-reporting */
+		if ((new_alarms_reported & EVENT(ALARM_GPU_LOST)) != 0U) {
+			poll_mask |= NVGPU_POLLHUP;
+		}
+
+		poll_mask |= (NVGPU_POLLIN | NVGPU_POLLPRI);
+		/* On next run do not report global alarms that were already
+		 * reported, but report SHUTDOWN always
+		 */
+		dev->alarms_reported = new_alarms_reported & ~LOCAL_ALARM_MASK &
+							~EVENT(ALARM_GPU_LOST);
+	}
+
+	if (poll_mask != 0U) {
+		nvgpu_atomic_set(&dev->poll_mask, (int)poll_mask);
+		nvgpu_clk_arb_event_post_event(dev);
+	}
+
+	return new_alarms_reported;
+}
+
+void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+
+	u64 current_mask;
+	u32 refcnt;
+	u32 alarm_mask;
+	u64 new_mask;
+
+	do {
+		current_mask = (u64)nvgpu_atomic64_read(&arb->alarm_mask);
+		/* atomic operations are strong so they do not need masks */
+
+		refcnt = ((u32) (current_mask >> 32)) + 1U;
+		alarm_mask =  (u32) ((u32)current_mask & ~alarm);
+		new_mask = ((u64) refcnt << 32) | alarm_mask;
+
+	} while (unlikely(current_mask !=
+			(u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
+				(long int)current_mask, (long int)new_mask)));
+}
+
+/*
+ * Process one scheduled work item.
+ */
+static void nvgpu_clk_arb_worker_poll_wakeup_process_item(
+		struct nvgpu_list_node *work_item)
+{
+	struct nvgpu_clk_arb_work_item *clk_arb_work_item =
+		nvgpu_clk_arb_work_item_from_worker_item(work_item);
+
+	struct gk20a *g = clk_arb_work_item->arb->g;
+
+	clk_arb_dbg(g, " ");
+
+	if (clk_arb_work_item->item_type == CLK_ARB_WORK_UPDATE_VF_TABLE) {
+#ifdef CONFIG_NVGPU_LS_PMU
+		nvgpu_clk_arb_run_vf_table_cb(clk_arb_work_item->arb);
+#endif
+	} else {
+		if (clk_arb_work_item->item_type == CLK_ARB_WORK_UPDATE_ARB) {
+			g->ops.clk_arb.clk_arb_run_arbiter_cb(
+							clk_arb_work_item->arb);
+		}
+	}
+}
+
+static void nvgpu_clk_arb_worker_poll_init(struct nvgpu_worker *worker)
+{
+	clk_arb_dbg(worker->g, " ");
+}
+
+const struct nvgpu_worker_ops clk_arb_worker_ops = {
+	.pre_process = nvgpu_clk_arb_worker_poll_init,
+	.wakeup_early_exit = nvgpu_worker_should_stop,
+	.wakeup_post_process = NULL,
+	.wakeup_process_item =
+		nvgpu_clk_arb_worker_poll_wakeup_process_item,
+	.wakeup_condition =
+		nvgpu_worker_should_stop,
+	.wakeup_timeout = NULL,
+};
+
+/**
+ * Append a work item to the worker's list.
+ *
+ * This adds work item to the end of the list and wakes the worker
+ * up immediately. If the work item already existed in the list, it's not added,
+ * because in that case it has been scheduled already but has not yet been
+ * processed.
+ */
+void nvgpu_clk_arb_worker_enqueue(struct gk20a *g,
+		struct nvgpu_clk_arb_work_item *work_item)
+{
+	clk_arb_dbg(g, " ");
+
+	(void)nvgpu_worker_enqueue(&g->clk_arb_worker.worker,
+			&work_item->worker_item);
+}
+
+/**
+ * Initialize the clk arb worker's metadata and start the background thread.
+ */
+int nvgpu_clk_arb_worker_init(struct gk20a *g)
+{
+	struct nvgpu_worker *worker = &g->clk_arb_worker.worker;
+
+	nvgpu_worker_init_name(worker, "nvgpu_clk_arb_poll", g->name);
+
+	return nvgpu_worker_init(g, worker, &clk_arb_worker_ops);
+}
+
+int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
+{
+	int err = 0;
+
+	if (g->ops.clk_arb.check_clk_arb_support != NULL) {
+		if (!g->ops.clk_arb.check_clk_arb_support(g)) {
+			return 0;
+		}
+	}
+	else {
+		return 0;
+	}
+
+	nvgpu_mutex_acquire(&g->clk_arb_enable_lock);
+
+	err = g->ops.clk_arb.arbiter_clk_init(g);
+
+	nvgpu_mutex_release(&g->clk_arb_enable_lock);
+
+	return err;
+}
+
+bool nvgpu_clk_arb_has_active_req(struct gk20a *g)
+{
+	return (nvgpu_atomic_read(&g->clk_arb_global_nr) > 0);
+}
+
+static void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+
+	nvgpu_clk_arb_set_global_alarm(g, alarm);
+	nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
+}
+
+void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+
+	if (arb != NULL) {
+		nvgpu_clk_arb_schedule_alarm(g,
+			BIT32(NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD));
+	}
+}
+
+void nvgpu_clk_arb_worker_deinit(struct gk20a *g)
+{
+	struct nvgpu_worker *worker = &g->clk_arb_worker.worker;
+
+	nvgpu_worker_deinit(worker);
+}
+
+void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+
+	nvgpu_mutex_acquire(&g->clk_arb_enable_lock);
+
+	if (arb != NULL) {
+		g->ops.clk_arb.clk_arb_cleanup(g->clk_arb);
+	}
+
+	nvgpu_mutex_release(&g->clk_arb_enable_lock);
+}
+
+int nvgpu_clk_arb_init_session(struct gk20a *g,
+		struct nvgpu_clk_session **l_session)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+	struct nvgpu_clk_session *session = *(l_session);
+
+	clk_arb_dbg(g, " ");
+
+	if (g->ops.clk_arb.check_clk_arb_support != NULL) {
+		if (!g->ops.clk_arb.check_clk_arb_support(g)) {
+			return 0;
+		}
+	}
+	else {
+		return 0;
+	}
+
+	session = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_session));
+	if (session == NULL) {
+		return -ENOMEM;
+	}
+	session->g = g;
+
+	nvgpu_ref_init(&session->refcount);
+
+	session->zombie = false;
+	session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
+	/* make sure that the initialization of the pool is visible
+	 * before the update
+	 */
+	nvgpu_smp_wmb();
+	session->target = &session->target_pool[0];
+
+	nvgpu_init_list_node(&session->targets);
+	nvgpu_spinlock_init(&session->session_lock);
+
+	nvgpu_spinlock_acquire(&arb->sessions_lock);
+	nvgpu_list_add_tail(&session->link, &arb->sessions);
+	nvgpu_spinlock_release(&arb->sessions_lock);
+
+	*l_session = session;
+
+	return 0;
+}
+
+static struct nvgpu_clk_dev *
+nvgpu_clk_dev_from_refcount(struct nvgpu_ref *refcount)
+{
+	return (struct nvgpu_clk_dev *)
+	   ((uintptr_t)refcount - offsetof(struct nvgpu_clk_dev, refcount));
+};
+
+void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount)
+{
+	struct nvgpu_clk_dev *dev = nvgpu_clk_dev_from_refcount(refcount);
+	struct nvgpu_clk_session *session = dev->session;
+	struct gk20a *g = session->g;
+
+	nvgpu_clk_notification_queue_free(g, &dev->queue);
+
+	nvgpu_atomic_dec(&g->clk_arb_global_nr);
+	nvgpu_kfree(g, dev);
+}
+
+static struct nvgpu_clk_session *
+nvgpu_clk_session_from_refcount(struct nvgpu_ref *refcount)
+{
+	return (struct nvgpu_clk_session *)
+	   ((uintptr_t)refcount - offsetof(struct nvgpu_clk_session, refcount));
+};
+
+void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount)
+{
+	struct nvgpu_clk_session *session =
+		nvgpu_clk_session_from_refcount(refcount);
+	struct nvgpu_clk_arb *arb = session->g->clk_arb;
+	struct gk20a *g = session->g;
+	struct nvgpu_clk_dev *dev, *tmp;
+
+	clk_arb_dbg(g, " ");
+
+	if (arb != NULL) {
+		nvgpu_spinlock_acquire(&arb->sessions_lock);
+		nvgpu_list_del(&session->link);
+		nvgpu_spinlock_release(&arb->sessions_lock);
+	}
+
+	nvgpu_spinlock_acquire(&session->session_lock);
+	nvgpu_list_for_each_entry_safe(dev, tmp, &session->targets,
+			nvgpu_clk_dev, node) {
+		nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
+		nvgpu_list_del(&dev->node);
+	}
+	nvgpu_spinlock_release(&session->session_lock);
+
+	nvgpu_kfree(g, session);
+}
+
+void nvgpu_clk_arb_release_session(struct gk20a *g,
+	struct nvgpu_clk_session *session)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+
+	clk_arb_dbg(g, " ");
+
+	session->zombie = true;
+	nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
+	if (arb != NULL) {
+		nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
+	}
+}
+#ifdef CONFIG_NVGPU_LS_PMU
+void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+
+	nvgpu_clk_arb_worker_enqueue(g, &arb->update_vf_table_work_item);
+}
+
+/* This function is inherently unsafe to call while arbiter is running
+ * arbiter must be blocked before calling this function
+ */
+u32 nvgpu_clk_arb_get_current_pstate(struct gk20a *g)
+{
+	return NV_READ_ONCE(g->clk_arb->actual->pstate);
+}
+
+void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+
+	if (lock) {
+		nvgpu_mutex_acquire(&arb->pstate_lock);
+	} else {
+		nvgpu_mutex_release(&arb->pstate_lock);
+	}
+}
+#endif
+bool nvgpu_clk_arb_is_valid_domain(struct gk20a *g, u32 api_domain)
+{
+	u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);
+	bool ret_result = false;
+
+	switch (api_domain) {
+	case NVGPU_CLK_DOMAIN_MCLK:
+		ret_result = ((clk_domains & CTRL_CLK_DOMAIN_MCLK) != 0U) ?
+								true : false;
+		break;
+	case NVGPU_CLK_DOMAIN_GPCCLK:
+		ret_result = ((clk_domains & CTRL_CLK_DOMAIN_GPCCLK) != 0U) ?
+								true : false;
+		break;
+	default:
+		ret_result = false;
+		break;
+	}
+	return ret_result;
+}
+
+int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
+		u16 *min_mhz, u16 *max_mhz)
+{
+	int ret = -EINVAL;
+
+	switch (api_domain) {
+	case NVGPU_CLK_DOMAIN_MCLK:
+		ret = g->ops.clk_arb.get_arbiter_clk_range(g,
+				CTRL_CLK_DOMAIN_MCLK, min_mhz, max_mhz);
+		break;
+
+	case NVGPU_CLK_DOMAIN_GPCCLK:
+		ret = g->ops.clk_arb.get_arbiter_clk_range(g,
+				CTRL_CLK_DOMAIN_GPCCLK, min_mhz, max_mhz);
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g,
+	u32 api_domain, u32 *max_points, u16 *fpoints)
+{
+	int err = -EINVAL;
+
+	switch (api_domain) {
+	case NVGPU_CLK_DOMAIN_GPCCLK:
+		err = g->ops.clk_arb.get_arbiter_f_points(g,
+			CTRL_CLK_DOMAIN_GPCCLK, max_points, fpoints);
+		if ((err != 0) || (fpoints == NULL)) {
+			break;
+		}
+		err = 0;
+		break;
+	case NVGPU_CLK_DOMAIN_MCLK:
+		err = g->ops.clk_arb.get_arbiter_f_points(g,
+			CTRL_CLK_DOMAIN_MCLK, max_points, fpoints);
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+	return err;
+}
+
+int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session,
+		u32 api_domain, u16 *target_mhz)
+{
+	int err = 0;
+	struct nvgpu_clk_arb_target *target = session->target;
+
+	if (!nvgpu_clk_arb_is_valid_domain(session->g, api_domain)) {
+		return -EINVAL;
+	}
+
+	switch (api_domain) {
+		case NVGPU_CLK_DOMAIN_MCLK:
+			*target_mhz = target->mclk;
+			break;
+
+		case NVGPU_CLK_DOMAIN_GPCCLK:
+			*target_mhz = target->gpc2clk;
+			break;
+
+		default:
+			*target_mhz = 0;
+			err = -EINVAL;
+			break;
+	}
+	return err;
+}
+
+int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g,
+		u32 api_domain, u16 *actual_mhz)
+{
+	struct nvgpu_clk_arb *arb = g->clk_arb;
+	int err = 0;
+	struct nvgpu_clk_arb_target *actual = arb->actual;
+
+	if (!nvgpu_clk_arb_is_valid_domain(g, api_domain)) {
+		return -EINVAL;
+	}
+
+	switch (api_domain) {
+		case NVGPU_CLK_DOMAIN_MCLK:
+			*actual_mhz = actual->mclk;
+			break;
+
+		case NVGPU_CLK_DOMAIN_GPCCLK:
+			*actual_mhz = actual->gpc2clk;
+			break;
+
+		default:
+			*actual_mhz = 0;
+			err = -EINVAL;
+			break;
+	}
+	return err;
+}
+
+unsigned long nvgpu_clk_measure_freq(struct gk20a *g, u32 api_domain)
+{
+	unsigned long freq = 0UL;
+
+	switch (api_domain) {
+	/*
+	 * Incase of iGPU clocks to each parition (GPC, SYS, LTC, XBAR) are
+	 * generated using 1X GPCCLK and hence should be the same.
+	 */
+	case CTRL_CLK_DOMAIN_GPCCLK:
+	case CTRL_CLK_DOMAIN_SYSCLK:
+	case CTRL_CLK_DOMAIN_XBARCLK:
+		freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
+		break;
+	default:
+		freq = 0UL;
+		break;
+	}
+	return freq;
+}
+
+int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g,
+		u32 api_domain, u16 *effective_mhz)
+{
+	u64 freq_mhz_u64;
+	int err = -EINVAL;
+
+	if (!nvgpu_clk_arb_is_valid_domain(g, api_domain)) {
+		return -EINVAL;
+	}
+
+	switch (api_domain) {
+	case NVGPU_CLK_DOMAIN_MCLK:
+		freq_mhz_u64 = g->ops.clk.measure_freq(g,
+					CTRL_CLK_DOMAIN_MCLK) /	1000000ULL;
+		err = 0;
+		break;
+
+	case NVGPU_CLK_DOMAIN_GPCCLK:
+		freq_mhz_u64 = g->ops.clk.measure_freq(g,
+					CTRL_CLK_DOMAIN_GPCCLK) / 1000000ULL;
+		err = 0;
+		break;
+
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	if (err == 0) {
+		nvgpu_assert(freq_mhz_u64 <= (u64)U16_MAX);
+		*effective_mhz = (u16)freq_mhz_u64;
+	}
+	return err;
+}
--- a/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.c
+++ b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.c
@@ -0,0 +1,406 @@
+/*
+ * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/clk_arb.h>
+#include <nvgpu/pmu/clk/clk.h>
+
+#include "clk_arb_gp10b.h"
+
+bool gp10b_check_clk_arb_support(struct gk20a *g)
+{
+	if (g->ops.clk_arb.get_arbiter_clk_domains != NULL) {
+		return true;
+	}
+	else {
+		return false;
+	}
+}
+
+u32 gp10b_get_arbiter_clk_domains(struct gk20a *g)
+{
+	(void)g;
+	clk_arb_dbg(g, " ");
+	return CTRL_CLK_DOMAIN_GPCCLK;
+}
+
+int gp10b_get_arbiter_f_points(struct gk20a *g,u32 api_domain,
+				u32 *num_points, u16 *freqs_in_mhz)
+{
+	int ret = 0;
+
+	clk_arb_dbg(g, " ");
+
+	switch (api_domain) {
+	case CTRL_CLK_DOMAIN_GPCCLK:
+		ret = g->ops.clk.clk_domain_get_f_points(g, CTRL_CLK_DOMAIN_GPCCLK,
+			num_points, freqs_in_mhz);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+int gp10b_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
+		u16 *min_mhz, u16 *max_mhz)
+{
+	int ret = 0;
+
+	clk_arb_dbg(g, " ");
+
+	switch (api_domain) {
+	case CTRL_CLK_DOMAIN_GPCCLK:
+		ret = g->ops.clk.get_clk_range(g, CTRL_CLK_DOMAIN_GPCCLK,
+			min_mhz, max_mhz);
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+int gp10b_get_arbiter_clk_default(struct gk20a *g, u32 api_domain,
+		u16 *default_mhz)
+{
+	int ret = 0;
+	u16 min_mhz, max_mhz;
+
+	clk_arb_dbg(g, " ");
+
+	switch (api_domain) {
+	case CTRL_CLK_DOMAIN_GPCCLK:
+		ret = gp10b_get_arbiter_clk_range(g, api_domain,
+			&min_mhz, &max_mhz);
+
+		if (ret == 0) {
+			*default_mhz = max_mhz;
+		}
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+int gp10b_init_clk_arbiter(struct gk20a *g)
+{
+	struct nvgpu_clk_arb *arb = NULL;
+	u16 default_mhz;
+	int err;
+	int index;
+	struct nvgpu_clk_vf_table *table;
+
+	clk_arb_dbg(g, " ");
+
+	if(g->clk_arb != NULL) {
+		return 0;
+	}
+
+	arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb));
+	if (arb == NULL) {
+		return -ENOMEM;
+	}
+
+	nvgpu_mutex_init(&arb->pstate_lock);
+
+	nvgpu_spinlock_init(&arb->sessions_lock);
+	nvgpu_spinlock_init(&arb->users_lock);
+	nvgpu_spinlock_init(&arb->requests_lock);
+
+	arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
+	if (arb->gpc2clk_f_points == NULL) {
+		err = -ENOMEM;
+		goto init_fail;
+	}
+
+	for (index = 0; index < 2; index++) {
+		table = &arb->vf_table_pool[index];
+		table->gpc2clk_num_points = MAX_F_POINTS;
+
+		table->gpc2clk_points = (struct nvgpu_clk_vf_point *)
+			nvgpu_kcalloc(g, MAX_F_POINTS,
+			sizeof(struct nvgpu_clk_vf_point));
+		if (table->gpc2clk_points == NULL) {
+			err = -ENOMEM;
+			goto init_fail;
+		}
+	}
+
+	g->clk_arb = arb;
+	arb->g = g;
+
+	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
+			CTRL_CLK_DOMAIN_GPCCLK, &default_mhz);
+	if (err < 0) {
+		err = -EINVAL;
+		goto init_fail;
+	}
+
+	arb->gpc2clk_default_mhz = default_mhz;
+
+	err = g->ops.clk_arb.get_arbiter_clk_range(g, CTRL_CLK_DOMAIN_GPCCLK,
+		&arb->gpc2clk_min, &arb->gpc2clk_max);
+
+	if (err < 0) {
+		err = -EINVAL;
+		goto init_fail;
+	}
+
+	arb->actual = &arb->actual_pool[0];
+
+	nvgpu_atomic_set(&arb->req_nr, 0);
+
+	nvgpu_atomic64_set(&arb->alarm_mask, 0);
+	err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue,
+		DEFAULT_EVENT_NUMBER);
+	if (err < 0) {
+		goto init_fail;
+	}
+
+	nvgpu_init_list_node(&arb->users);
+	nvgpu_init_list_node(&arb->sessions);
+	nvgpu_init_list_node(&arb->requests);
+
+	err = nvgpu_cond_init(&arb->request_wq);
+	if (err < 0) {
+		goto init_fail;
+	}
+
+	nvgpu_init_list_node(&arb->update_arb_work_item.worker_item);
+	arb->update_arb_work_item.arb = arb;
+	arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB;
+
+	err = nvgpu_clk_arb_worker_init(g);
+	if (err < 0) {
+		goto init_fail;
+	}
+
+	/* This is set for the duration of the default req */
+	nvgpu_atomic_inc(&g->clk_arb_global_nr);
+
+	nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
+
+	do {
+		/* Check that first run is completed */
+		nvgpu_smp_mb();
+		NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq,
+			nvgpu_atomic_read(&arb->req_nr) != 0, 0U);
+	} while (nvgpu_atomic_read(&arb->req_nr) == 0);
+
+	/* Once the default request is completed, reduce the usage count */
+	nvgpu_atomic_dec(&g->clk_arb_global_nr);
+
+	return arb->status;
+
+init_fail:
+	nvgpu_kfree(g, arb->gpc2clk_f_points);
+
+	for (index = 0; index < 2; index++) {
+		nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
+	}
+
+	nvgpu_mutex_destroy(&arb->pstate_lock);
+	nvgpu_kfree(g, arb);
+
+	return err;
+}
+
+void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb)
+{
+	struct nvgpu_clk_session *session;
+	struct nvgpu_clk_dev *dev;
+	struct nvgpu_clk_dev *tmp;
+	struct nvgpu_clk_arb_target *target, *actual;
+	struct gk20a *g = arb->g;
+
+	bool gpc2clk_set;
+
+	int status = 0;
+	unsigned long rounded_rate = 0;
+
+	u16 gpc2clk_target, gpc2clk_session_target;
+
+	clk_arb_dbg(g, " ");
+
+	/* Only one arbiter should be running */
+	gpc2clk_target = 0;
+
+	nvgpu_spinlock_acquire(&arb->sessions_lock);
+	nvgpu_list_for_each_entry(session, &arb->sessions,
+			nvgpu_clk_session, link) {
+		if (session->zombie) {
+			continue;
+		}
+		gpc2clk_set = false;
+		target = (session->target == &session->target_pool[0] ?
+				&session->target_pool[1] :
+				&session->target_pool[0]);
+		nvgpu_spinlock_acquire(&session->session_lock);
+		if (!nvgpu_list_empty(&session->targets)) {
+			/* Copy over state */
+			target->gpc2clk = session->target->gpc2clk;
+			/* Query the latest committed request */
+			nvgpu_list_for_each_entry_safe(dev, tmp, &session->targets,
+						nvgpu_clk_dev, node) {
+				if (!gpc2clk_set &&
+					dev->gpc2clk_target_mhz != (u16)0) {
+					target->gpc2clk =
+						dev->gpc2clk_target_mhz;
+					gpc2clk_set = true;
+				}
+				nvgpu_ref_get(&dev->refcount);
+				nvgpu_list_del(&dev->node);
+				nvgpu_spinlock_acquire(&arb->requests_lock);
+				nvgpu_list_add(&dev->node, &arb->requests);
+				nvgpu_spinlock_release(&arb->requests_lock);
+			}
+			session->target = target;
+		}
+		nvgpu_spinlock_release(&session->session_lock);
+
+		gpc2clk_target =
+			gpc2clk_target > session->target->gpc2clk ?
+			gpc2clk_target : session->target->gpc2clk;
+	}
+	nvgpu_spinlock_release(&arb->sessions_lock);
+
+	gpc2clk_target = (gpc2clk_target > (u16)0) ? gpc2clk_target :
+			arb->gpc2clk_default_mhz;
+
+	if (gpc2clk_target < arb->gpc2clk_min) {
+		gpc2clk_target = arb->gpc2clk_min;
+	}
+
+	if (gpc2clk_target > arb->gpc2clk_max) {
+		gpc2clk_target = arb->gpc2clk_max;
+	}
+
+	gpc2clk_session_target = gpc2clk_target;
+
+	if (arb->actual->gpc2clk == gpc2clk_target) {
+		nvgpu_atomic_inc(&arb->req_nr);
+		nvgpu_cond_signal_interruptible(&arb->request_wq);
+		goto exit_arb;
+	}
+
+	nvgpu_mutex_acquire(&arb->pstate_lock);
+
+	status = g->ops.clk.clk_get_round_rate(g,
+		CTRL_CLK_DOMAIN_GPCCLK, gpc2clk_session_target * 1000000UL, &rounded_rate);
+
+	clk_arb_dbg(g, "rounded_rate: %lu\n",
+		rounded_rate);
+
+	if (status < 0) {
+		arb->status = status;
+		nvgpu_mutex_release(&arb->pstate_lock);
+
+		/* make status visible */
+		nvgpu_smp_mb();
+		nvgpu_atomic_inc(&arb->req_nr);
+		nvgpu_cond_signal_interruptible(&arb->request_wq);
+		goto exit_arb;
+	}
+
+	/* the igpu set_rate accepts freq in Hz */
+	status = g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate);
+
+	if (status < 0) {
+		arb->status = status;
+		nvgpu_mutex_release(&arb->pstate_lock);
+
+		/* make status visible */
+		nvgpu_smp_mb();
+		nvgpu_atomic_inc(&arb->req_nr);
+		nvgpu_cond_signal_interruptible(&arb->request_wq);
+		goto exit_arb;
+	}
+
+	actual = ((NV_READ_ONCE(arb->actual)) == &arb->actual_pool[0] ?
+			&arb->actual_pool[1] : &arb->actual_pool[0]);
+
+	/* do not reorder this pointer */
+	nvgpu_smp_rmb();
+	actual->gpc2clk = gpc2clk_target;
+	arb->status = 0;
+
+	/* Make changes visible to other threads */
+	nvgpu_smp_wmb();
+	arb->actual = actual;
+
+	/* status must be visible before atomic inc */
+	nvgpu_smp_wmb();
+	nvgpu_atomic_inc(&arb->req_nr);
+
+	/* Unlock pstate change for PG */
+	nvgpu_mutex_release(&arb->pstate_lock);
+
+	nvgpu_cond_signal_interruptible(&arb->request_wq);
+
+exit_arb:
+	if (status < 0) {
+		nvgpu_err(g, "Error in arbiter update");
+	}
+
+	/* notify completion for all requests */
+	nvgpu_spinlock_acquire(&arb->requests_lock);
+	nvgpu_list_for_each_entry_safe(dev, tmp, &arb->requests,
+			nvgpu_clk_dev, node) {
+		u32 tmp_poll_mask = NVGPU_POLLIN | NVGPU_POLLRDNORM;
+		nvgpu_atomic_set(&dev->poll_mask,
+			(int)tmp_poll_mask);
+		nvgpu_clk_arb_event_post_event(dev);
+		nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
+		nvgpu_list_del(&dev->node);
+	}
+	nvgpu_spinlock_release(&arb->requests_lock);
+
+	clk_arb_dbg(g, "done");
+}
+
+void gp10b_clk_arb_cleanup(struct nvgpu_clk_arb *arb)
+{
+	struct gk20a *g = arb->g;
+	int index;
+
+	nvgpu_clk_arb_worker_deinit(g);
+	nvgpu_kfree(g, arb->gpc2clk_f_points);
+
+	for (index = 0; index < 2; index++) {
+		nvgpu_kfree(g,
+			arb->vf_table_pool[index].gpc2clk_points);
+	}
+
+	nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
+	nvgpu_kfree(g, g->clk_arb);
+
+	g->clk_arb = NULL;
+}
--- a/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.h
+++ b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gp10b.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef CLK_ARB_GP10B_H
+#define CLK_ARB_GP10B_H
+
+struct nvgpu_clk_session;
+struct nvgpu_clk_arb;
+
+bool gp10b_check_clk_arb_support(struct gk20a *g);
+u32 gp10b_get_arbiter_clk_domains(struct gk20a *g);
+int gp10b_get_arbiter_f_points(struct gk20a *g,u32 api_domain,
+				u32 *num_points, u16 *freqs_in_mhz);
+int gp10b_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
+		u16 *min_mhz, u16 *max_mhz);
+int gp10b_get_arbiter_clk_default(struct gk20a *g, u32 api_domain,
+		u16 *default_mhz);
+int gp10b_init_clk_arbiter(struct gk20a *g);
+void gp10b_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb);
+void gp10b_clk_arb_cleanup(struct nvgpu_clk_arb *arb);
+
+#endif /* CLK_ARB_GP106_H */
--- a/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gv100.c
+++ b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gv100.c
@@ -0,0 +1,560 @@
+/*
+ * Copyright (c) 2016-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/clk_arb.h>
+#include <nvgpu/pmu/clk/clk.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/boardobjgrp_e255.h>
+#include <nvgpu/pmu/perf.h>
+
+#include "clk_arb_gv100.h"
+
+bool gv100_check_clk_arb_support(struct gk20a *g)
+{
+	if ((g->ops.clk_arb.get_arbiter_clk_domains != NULL) &&
+			nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)){
+		return true;
+	}
+	else {
+		return false;
+	}
+}
+
+u32 gv100_get_arbiter_clk_domains(struct gk20a *g)
+{
+	(void)g;
+	return (CTRL_CLK_DOMAIN_GPCCLK);
+}
+
+int gv100_get_arbiter_f_points(struct gk20a *g,u32 api_domain,
+				u32 *num_points, u16 *freqs_in_mhz)
+{
+	return g->ops.clk.clk_domain_get_f_points(g,
+		api_domain, num_points, freqs_in_mhz);
+}
+
+int gv100_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
+		u16 *min_mhz, u16 *max_mhz)
+{
+	u32 clkwhich;
+	struct nvgpu_pmu_perf_pstate_clk_info *p0_info;
+	u16 max_min_freq_mhz;
+	u16 limit_min_mhz;
+	u16 gpcclk_cap_mhz;
+	bool error_status = false;
+
+	switch (api_domain) {
+	case CTRL_CLK_DOMAIN_MCLK:
+		clkwhich = CLKWHICH_MCLK;
+		break;
+
+	case CTRL_CLK_DOMAIN_GPCCLK:
+		clkwhich = CLKWHICH_GPCCLK;
+		break;
+
+	default:
+		error_status = true;
+		break;
+	}
+
+	if (error_status == true) {
+		return -EINVAL;
+	}
+
+	p0_info = nvgpu_pmu_perf_pstate_get_clk_set_info(g,
+			CTRL_PERF_PSTATE_P0, clkwhich);
+	if (p0_info == NULL) {
+		return -EINVAL;
+	}
+
+	limit_min_mhz = p0_info->min_mhz;
+	gpcclk_cap_mhz = p0_info->max_mhz;
+
+	max_min_freq_mhz = nvgpu_pmu_clk_fll_get_min_max_freq(g);
+	/*
+	 * When DVCO min is 0 in vbios update it to DVCO_MIN_DEFAULT_MHZ.
+	 */
+	if (max_min_freq_mhz == 0U) {
+		max_min_freq_mhz = DVCO_MIN_DEFAULT_MHZ;
+	}
+
+	/*
+	 * Needed for DVCO min.
+	 */
+	if (api_domain == CTRL_CLK_DOMAIN_GPCCLK) {
+		if ((max_min_freq_mhz != 0U) &&
+			(max_min_freq_mhz >= limit_min_mhz)) {
+			limit_min_mhz = nvgpu_safe_cast_u32_to_u16(
+				nvgpu_safe_add_u32(max_min_freq_mhz, 1U));
+		}
+		if ((g->clk_arb->gpc_cap_clkmhz != 0U) &&
+			(p0_info->max_mhz > g->clk_arb->gpc_cap_clkmhz )) {
+			gpcclk_cap_mhz = g->clk_arb->gpc_cap_clkmhz;
+		}
+	}
+	*min_mhz = limit_min_mhz;
+	*max_mhz = gpcclk_cap_mhz;
+
+	return 0;
+}
+
+int gv100_get_arbiter_clk_default(struct gk20a *g, u32 api_domain,
+		u16 *default_mhz)
+{
+	u32 clkwhich;
+	struct nvgpu_pmu_perf_pstate_clk_info *p0_info;
+	bool error_status = false;
+	u16 gpcclk_cap_mhz;
+
+	switch (api_domain) {
+	case CTRL_CLK_DOMAIN_MCLK:
+		clkwhich = CLKWHICH_MCLK;
+		break;
+
+	case CTRL_CLK_DOMAIN_GPCCLK:
+		clkwhich = CLKWHICH_GPCCLK;
+		break;
+
+	default:
+		error_status = true;
+		break;
+	}
+
+	if (error_status == true) {
+		return -EINVAL;
+	}
+
+	p0_info = nvgpu_pmu_perf_pstate_get_clk_set_info(g,
+			CTRL_PERF_PSTATE_P0, clkwhich);
+	if (p0_info == NULL) {
+		return -EINVAL;
+	}
+
+	gpcclk_cap_mhz = p0_info->max_mhz;
+	if (api_domain == CTRL_CLK_DOMAIN_GPCCLK) {
+		if ((g->clk_arb->gpc_cap_clkmhz != 0U) &&
+			(p0_info->max_mhz > g->clk_arb->gpc_cap_clkmhz )) {
+			gpcclk_cap_mhz = g->clk_arb->gpc_cap_clkmhz;
+		}
+	}
+	*default_mhz = gpcclk_cap_mhz;
+
+	return 0;
+}
+
+int gv100_init_clk_arbiter(struct gk20a *g)
+{
+	struct nvgpu_clk_arb *arb;
+	u16 default_mhz;
+	int err;
+	int index;
+	struct nvgpu_clk_vf_table *table;
+	clk_arb_dbg(g, " ");
+
+	if (g->clk_arb != NULL) {
+		return 0;
+	}
+	arb = nvgpu_kzalloc(g, sizeof(struct nvgpu_clk_arb));
+	if (arb == NULL) {
+		return -ENOMEM;
+	}
+
+	nvgpu_mutex_init(&arb->pstate_lock);
+	nvgpu_spinlock_init(&arb->sessions_lock);
+	nvgpu_spinlock_init(&arb->users_lock);
+	nvgpu_spinlock_init(&arb->requests_lock);
+
+	arb->mclk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
+	if (arb->mclk_f_points == NULL) {
+		err = -ENOMEM;
+		goto init_fail;
+	}
+
+	arb->gpc2clk_f_points = nvgpu_kcalloc(g, MAX_F_POINTS, sizeof(u16));
+	if (arb->gpc2clk_f_points == NULL) {
+		err = -ENOMEM;
+		goto init_fail;
+	}
+
+	for (index = 0; index < 2; index++) {
+		table = &arb->vf_table_pool[index];
+		table->gpc2clk_num_points = MAX_F_POINTS;
+		table->mclk_num_points = MAX_F_POINTS;
+
+		table->gpc2clk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
+			sizeof(struct nvgpu_clk_vf_point));
+		if (table->gpc2clk_points == NULL) {
+			err = -ENOMEM;
+			goto init_fail;
+		}
+
+
+		table->mclk_points = nvgpu_kcalloc(g, MAX_F_POINTS,
+			sizeof(struct nvgpu_clk_vf_point));
+		if (table->mclk_points == NULL) {
+			err = -ENOMEM;
+			goto init_fail;
+		}
+	}
+
+	g->clk_arb = arb;
+	arb->g = g;
+
+	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
+			CTRL_CLK_DOMAIN_MCLK, &default_mhz);
+	if (err < 0) {
+		err = -EINVAL;
+		goto init_fail;
+	}
+
+	arb->mclk_default_mhz = default_mhz;
+
+	err =  g->ops.clk_arb.get_arbiter_clk_default(g,
+			CTRL_CLK_DOMAIN_GPCCLK, &default_mhz);
+	if (err < 0) {
+		err = -EINVAL;
+		goto init_fail;
+	}
+
+	arb->gpc2clk_default_mhz = default_mhz;
+
+	arb->actual = &arb->actual_pool[0];
+
+	nvgpu_atomic_set(&arb->req_nr, 0);
+
+	nvgpu_atomic64_set(&arb->alarm_mask, 0);
+	err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue,
+		DEFAULT_EVENT_NUMBER);
+	if (err < 0) {
+		goto init_fail;
+	}
+	nvgpu_init_list_node(&arb->users);
+	nvgpu_init_list_node(&arb->sessions);
+	nvgpu_init_list_node(&arb->requests);
+
+	(void)nvgpu_cond_init(&arb->request_wq);
+
+	nvgpu_init_list_node(&arb->update_vf_table_work_item.worker_item);
+	nvgpu_init_list_node(&arb->update_arb_work_item.worker_item);
+	arb->update_vf_table_work_item.arb = arb;
+	arb->update_arb_work_item.arb = arb;
+	arb->update_vf_table_work_item.item_type = CLK_ARB_WORK_UPDATE_VF_TABLE;
+	arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB;
+	err = nvgpu_clk_arb_worker_init(g);
+	if (err < 0) {
+		goto init_fail;
+	}
+
+	if (g->dgpu_max_clk != 0U) {
+		g->dgpu_max_clk = (g->dgpu_max_clk /
+			FREQ_STEP_SIZE_MHZ) * FREQ_STEP_SIZE_MHZ;
+		arb->gpc_cap_clkmhz = g->dgpu_max_clk;
+	}
+#ifdef CONFIG_DEBUG_FS
+	arb->debug = &arb->debug_pool[0];
+
+	if (!arb->debugfs_set) {
+		if (nvgpu_clk_arb_debugfs_init(g))
+			arb->debugfs_set = true;
+	}
+#endif
+	err = nvgpu_clk_vf_point_cache(g);
+	if (err < 0) {
+		goto init_fail;
+	}
+
+	err = nvgpu_clk_arb_update_vf_table(arb);
+	if (err < 0) {
+		goto init_fail;
+	}
+
+	do {
+		/* Check that first run is completed */
+		nvgpu_smp_mb();
+		NVGPU_COND_WAIT_INTERRUPTIBLE(&arb->request_wq,
+			nvgpu_atomic_read(&arb->req_nr), 0U);
+	} while (nvgpu_atomic_read(&arb->req_nr) == 0);
+	return arb->status;
+
+init_fail:
+	nvgpu_kfree(g, arb->gpc2clk_f_points);
+	nvgpu_kfree(g, arb->mclk_f_points);
+
+	for (index = 0; index < 2; index++) {
+		nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
+		nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
+	}
+
+	nvgpu_mutex_destroy(&arb->pstate_lock);
+	nvgpu_kfree(g, arb);
+
+	return err;
+}
+
+void gv100_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb)
+{
+	struct nvgpu_clk_session *session;
+	struct nvgpu_clk_dev *dev;
+	struct nvgpu_clk_dev *tmp;
+	struct nvgpu_clk_arb_target *target, *actual;
+	struct gk20a *g = arb->g;
+
+	u32 current_pstate = VF_POINT_INVALID_PSTATE;
+	u32 voltuv = 0;
+	bool mclk_set, gpc2clk_set;
+	u32 alarms_notified = 0;
+	u32 current_alarm;
+	int status = 0;
+	/* Temporary variables for checking target frequency */
+	u16 gpc2clk_target, mclk_target;
+	struct nvgpu_clk_slave_freq vf_point;
+
+#ifdef CONFIG_DEBUG_FS
+	s64 t0, t1;
+	struct nvgpu_clk_arb_debug *debug;
+
+#endif
+
+	clk_arb_dbg(g, " ");
+
+	/* bail out if gpu is down */
+	if (nvgpu_atomic64_read(&arb->alarm_mask) & EVENT(ALARM_GPU_LOST)) {
+		goto exit_arb;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	t0 = nvgpu_current_time_ns();
+#endif
+
+	/* Only one arbiter should be running */
+	gpc2clk_target = 0;
+	mclk_target = 0;
+	nvgpu_spinlock_acquire(&arb->sessions_lock);
+	nvgpu_list_for_each_entry(session, &arb->sessions,
+			nvgpu_clk_session, link) {
+		if (!session->zombie) {
+			mclk_set = false;
+			gpc2clk_set = false;
+			target = (session->target == &session->target_pool[0] ?
+					&session->target_pool[1] :
+					&session->target_pool[0]);
+			nvgpu_spinlock_acquire(&session->session_lock);
+			if (!nvgpu_list_empty(&session->targets)) {
+				/* Copy over state */
+				target->mclk = session->target->mclk;
+				target->gpc2clk = session->target->gpc2clk;
+				/* Query the latest committed request */
+				nvgpu_list_for_each_entry_safe(dev, tmp,
+				 &session->targets, nvgpu_clk_dev, node) {
+					if ((mclk_set == false) && (dev->mclk_target_mhz != 0U)) {
+						target->mclk =
+							dev->mclk_target_mhz;
+						mclk_set = true;
+					}
+					if ((gpc2clk_set == false) &&
+						(dev->gpc2clk_target_mhz != 0U)) {
+						target->gpc2clk =
+							dev->gpc2clk_target_mhz;
+						gpc2clk_set = true;
+					}
+					nvgpu_ref_get(&dev->refcount);
+					nvgpu_list_del(&dev->node);
+					nvgpu_spinlock_acquire(
+						&arb->requests_lock);
+					nvgpu_list_add(
+						&dev->node, &arb->requests);
+					nvgpu_spinlock_release(&arb->requests_lock);
+				}
+				session->target = target;
+			}
+			nvgpu_spinlock_release(
+				&session->session_lock);
+
+			mclk_target = mclk_target > session->target->mclk ?
+				mclk_target : session->target->mclk;
+
+			gpc2clk_target =
+				gpc2clk_target > session->target->gpc2clk ?
+				gpc2clk_target : session->target->gpc2clk;
+		}
+	}
+	nvgpu_spinlock_release(&arb->sessions_lock);
+
+	gpc2clk_target = (gpc2clk_target > 0U) ? gpc2clk_target :
+			arb->gpc2clk_default_mhz;
+
+	if (gpc2clk_target < arb->gpc2clk_min) {
+		gpc2clk_target = arb->gpc2clk_min;
+	}
+
+	if (gpc2clk_target > arb->gpc2clk_max) {
+		gpc2clk_target = arb->gpc2clk_max;
+	}
+
+	mclk_target = (mclk_target > 0U) ? mclk_target :
+			arb->mclk_default_mhz;
+
+	if (mclk_target < arb->mclk_min) {
+		mclk_target = arb->mclk_min;
+	}
+
+	if (mclk_target > arb->mclk_max) {
+		mclk_target = arb->mclk_max;
+	}
+
+	if ((arb->gpc_cap_clkmhz != 0U) &&
+			(gpc2clk_target > arb->gpc_cap_clkmhz)) {
+		gpc2clk_target = arb->gpc_cap_clkmhz;
+	}
+
+	vf_point.gpc_mhz = gpc2clk_target;
+	(void)nvgpu_clk_arb_find_slave_points(arb, &vf_point);
+	if (status != 0) {
+		nvgpu_err(g, "Unable to get slave frequency");
+		goto exit_arb;
+	}
+
+	status = nvgpu_pmu_perf_changeseq_set_clks(g, &vf_point);
+	if (status != 0) {
+		nvgpu_err(g, "Unable to program frequency");
+		goto exit_arb;
+	}
+
+	actual = NV_READ_ONCE(arb->actual) == &arb->actual_pool[0] ?
+			&arb->actual_pool[1] : &arb->actual_pool[0];
+
+	/* do not reorder this pointer */
+	nvgpu_smp_rmb();
+	actual->gpc2clk = gpc2clk_target;
+	actual->mclk = mclk_target;
+	arb->voltuv_actual = voltuv;
+	actual->pstate = current_pstate;
+	arb->status = status;
+
+	/* Make changes visible to other threads */
+	nvgpu_smp_wmb();
+	arb->actual = actual;
+
+	/* status must be visible before atomic inc */
+	nvgpu_smp_wmb();
+	nvgpu_atomic_inc(&arb->req_nr);
+
+	/* VF Update complete */
+	nvgpu_clk_arb_set_global_alarm(g, EVENT(VF_UPDATE));
+
+	nvgpu_cond_signal_interruptible(&arb->request_wq);
+#ifdef CONFIG_DEBUG_FS
+	t1 = nvgpu_current_time_ns();
+
+	debug = arb->debug == &arb->debug_pool[0] ?
+		&arb->debug_pool[1] : &arb->debug_pool[0];
+
+	memcpy(debug, arb->debug, sizeof(arb->debug_pool[0]));
+	debug->switch_num++;
+
+	if (debug->switch_num == 1) {
+		debug->switch_max = debug->switch_min =
+			debug->switch_avg = (t1-t0)/1000;
+		debug->switch_std = 0;
+	} else {
+		s64 prev_avg;
+		s64 curr = (t1-t0)/1000;
+
+		debug->switch_max = curr > debug->switch_max ?
+			curr : debug->switch_max;
+		debug->switch_min = debug->switch_min ?
+			(curr < debug->switch_min ?
+				curr : debug->switch_min) : curr;
+		prev_avg = debug->switch_avg;
+		debug->switch_avg = (curr +
+			(debug->switch_avg * (debug->switch_num-1))) /
+			debug->switch_num;
+		debug->switch_std +=
+			(curr - debug->switch_avg) * (curr - prev_avg);
+	}
+	/* commit changes before exchanging debug pointer */
+	nvgpu_smp_wmb();
+	arb->debug = debug;
+#endif
+
+exit_arb:
+	if (status < 0) {
+		nvgpu_err(g, "Error in arbiter update");
+		nvgpu_clk_arb_set_global_alarm(g,
+			EVENT(ALARM_CLOCK_ARBITER_FAILED));
+	}
+
+	current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask);
+	/* notify completion for all requests */
+	nvgpu_spinlock_acquire(&arb->requests_lock);
+	nvgpu_list_for_each_entry_safe(dev, tmp, &arb->requests,
+			nvgpu_clk_dev, node) {
+		/* avoid casting composite expression below */
+		u32 tmp_mask = NVGPU_POLLIN | NVGPU_POLLRDNORM;
+
+		nvgpu_atomic_set(&dev->poll_mask, (int)tmp_mask);
+		nvgpu_clk_arb_event_post_event(dev);
+		nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
+		nvgpu_list_del(&dev->node);
+	}
+	nvgpu_spinlock_release(&arb->requests_lock);
+
+	nvgpu_atomic_set(&arb->notification_queue.head,
+		nvgpu_atomic_read(&arb->notification_queue.tail));
+	/* notify event for all users */
+	nvgpu_spinlock_acquire(&arb->users_lock);
+	nvgpu_list_for_each_entry(dev, &arb->users, nvgpu_clk_dev, link) {
+		alarms_notified |=
+			nvgpu_clk_arb_notify(dev, arb->actual, current_alarm);
+	}
+	nvgpu_spinlock_release(&arb->users_lock);
+
+	/* clear alarms */
+	nvgpu_clk_arb_clear_global_alarm(g, alarms_notified &
+		~EVENT(ALARM_GPU_LOST));
+}
+
+void gv100_clk_arb_cleanup(struct nvgpu_clk_arb *arb)
+{
+	struct gk20a *g = arb->g;
+	int index;
+
+	nvgpu_kfree(g, arb->gpc2clk_f_points);
+	nvgpu_kfree(g, arb->mclk_f_points);
+
+	for (index = 0; index < 2; index++) {
+		nvgpu_kfree(g,
+			arb->vf_table_pool[index].gpc2clk_points);
+		nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
+	}
+
+	nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
+	nvgpu_kfree(g, g->clk_arb);
+
+	g->clk_arb = NULL;
+}
+
+void gv100_stop_clk_arb_threads(struct gk20a *g)
+{
+	nvgpu_clk_arb_worker_deinit(g);
+}
--- a/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gv100.h
+++ b/drivers/gpu/nvgpu/common/clk_arb/clk_arb_gv100.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2016-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef CLK_ARB_GV100_H
+#define CLK_ARB_GV100_H
+
+struct nvgpu_clk_session;
+struct nvgpu_clk_arb;
+
+#define DVCO_MIN_DEFAULT_MHZ	405
+
+bool gv100_check_clk_arb_support(struct gk20a *g);
+u32 gv100_get_arbiter_clk_domains(struct gk20a *g);
+int gv100_get_arbiter_f_points(struct gk20a *g,u32 api_domain,
+				u32 *num_points, u16 *freqs_in_mhz);
+int gv100_get_arbiter_clk_range(struct gk20a *g, u32 api_domain,
+		u16 *min_mhz, u16 *max_mhz);
+int gv100_get_arbiter_clk_default(struct gk20a *g, u32 api_domain,
+		u16 *default_mhz);
+int gv100_init_clk_arbiter(struct gk20a *g);
+void gv100_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb);
+void gv100_clk_arb_cleanup(struct nvgpu_clk_arb *arb);
+void gv100_stop_clk_arb_threads(struct gk20a *g);
+#endif /* CLK_ARB_GV100_H */
--- a/drivers/gpu/nvgpu/common/cyclestats/cyclestats.c
+++ b/drivers/gpu/nvgpu/common/cyclestats/cyclestats.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/regops.h>
+#include <nvgpu/log.h>
+#include <nvgpu/io.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/cyclestats.h>
+
+#include "cyclestats_priv.h"
+
+static inline bool is_valid_cyclestats_bar0_offset_gk20a(struct gk20a *g,
+							 u32 offset)
+{
+	/* support only 24-bit 4-byte aligned offsets */
+	bool valid = !(offset & 0xFF000003U);
+
+	if (g->allow_all) {
+		return true;
+	}
+
+	/* whitelist check */
+	valid = valid &&
+		is_bar0_global_offset_whitelisted_gk20a(g, offset);
+	/* resource size check in case there was a problem
+	 * with allocating the assumed size of bar0 */
+	valid = valid && nvgpu_io_valid_reg(g, offset);
+	return valid;
+}
+
+void nvgpu_cyclestats_exec(struct gk20a *g,
+		struct nvgpu_channel *ch, u32 offset)
+{
+	void *virtual_address;
+	u32 buffer_size;
+	bool exit;
+
+	/* GL will never use payload 0 for cycle state */
+	if ((ch->cyclestate.cyclestate_buffer == NULL) || (offset == 0U)) {
+		return;
+	}
+
+	nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
+
+	virtual_address = ch->cyclestate.cyclestate_buffer;
+	buffer_size = ch->cyclestate.cyclestate_buffer_size;
+	exit = false;
+
+	while (!exit) {
+		struct share_buffer_head *sh_hdr;
+		u32 min_element_size;
+
+		/* validate offset */
+		if (offset + sizeof(struct share_buffer_head) > buffer_size ||
+		    offset + sizeof(struct share_buffer_head) < offset) {
+			nvgpu_err(g,
+				  "cyclestats buffer overrun at offset 0x%x",
+				  offset);
+			break;
+		}
+
+		sh_hdr = (struct share_buffer_head *)
+			 ((char *)virtual_address + offset);
+
+		min_element_size =
+			U32(sh_hdr->operation == OP_END ?
+			 sizeof(struct share_buffer_head) :
+			 sizeof(struct nvgpu_cyclestate_buffer_elem));
+
+		/* validate sh_hdr->size */
+		if (sh_hdr->size < min_element_size ||
+		    offset + sh_hdr->size > buffer_size ||
+		    offset + sh_hdr->size < offset) {
+			nvgpu_err(g,
+				  "bad cyclestate buffer header size at offset 0x%x",
+				  offset);
+			sh_hdr->failed = U32(true);
+			break;
+		}
+
+		switch (sh_hdr->operation) {
+		case OP_END:
+			exit = true;
+			break;
+
+		case BAR0_READ32:
+		case BAR0_WRITE32:
+		{
+			struct nvgpu_cyclestate_buffer_elem *op_elem =
+				(struct nvgpu_cyclestate_buffer_elem *)sh_hdr;
+			bool valid = is_valid_cyclestats_bar0_offset_gk20a(
+						g, op_elem->offset_bar0);
+			u32 raw_reg;
+			u64 mask_orig;
+			u64 v;
+
+			if (!valid) {
+				nvgpu_err(g,
+					   "invalid cycletstats op offset: 0x%x",
+					   op_elem->offset_bar0);
+
+				exit = true;
+				sh_hdr->failed = U32(exit);
+				break;
+			}
+
+			mask_orig =
+				((1ULL << (op_elem->last_bit + 1)) - 1) &
+				~((1ULL << op_elem->first_bit) - 1);
+
+			raw_reg = nvgpu_readl(g, op_elem->offset_bar0);
+
+			switch (sh_hdr->operation) {
+			case BAR0_READ32:
+				op_elem->data =	((raw_reg & mask_orig)
+							>> op_elem->first_bit);
+				break;
+
+			case BAR0_WRITE32:
+				v = 0;
+				if ((unsigned int)mask_orig !=
+							~((unsigned int)0)) {
+					v = (unsigned int)
+						(raw_reg & ~mask_orig);
+				}
+
+				v |= ((op_elem->data << op_elem->first_bit)
+							& mask_orig);
+				nvgpu_writel(g,op_elem->offset_bar0,
+					     (unsigned int)v);
+				break;
+			default:
+				/* nop ok?*/
+				break;
+			}
+		}
+		break;
+
+		default:
+			/* no operation content case */
+			exit = true;
+			break;
+		}
+		sh_hdr->completed = U32(true);
+		offset += sh_hdr->size;
+	}
+	nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
+}
--- a/drivers/gpu/nvgpu/common/cyclestats/cyclestats_priv.h
+++ b/drivers/gpu/nvgpu/common/cyclestats/cyclestats_priv.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_CYCLESTATS_PRIV_H
+#define NVGPU_CYCLESTATS_PRIV_H
+
+#include <nvgpu/types.h>
+
+#define MULTICHAR_TAG(a, b, c, d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d))
+
+enum BAR0_DEBUG_OPERATION {
+	BARO_ZERO_NOP = 0,
+	OP_END = MULTICHAR_TAG('D', 'O', 'N', 'E'),
+	BAR0_READ32 = MULTICHAR_TAG('0', 'R', '3', '2'),
+	BAR0_WRITE32 = MULTICHAR_TAG('0', 'W', '3', '2'),
+};
+
+struct share_buffer_head {
+	enum BAR0_DEBUG_OPERATION operation;
+/* size of the operation item */
+	u32 size;
+	u32 completed;
+	u32 failed;
+	u64 context;
+	u64 completion_callback;
+};
+
+struct nvgpu_cyclestate_buffer_elem {
+	struct share_buffer_head	head;
+/* in */
+	u64 p_data;
+	u64 p_done;
+	u32 offset_bar0;
+	u16 first_bit;
+	u16 last_bit;
+/* out */
+/* keep 64 bits to be consistent */
+	u64 data;
+};
+
+#endif /* NVGPU_CYCLESTATS_PRIV_H */
--- a/drivers/gpu/nvgpu/common/debugger.c
+++ b/drivers/gpu/nvgpu/common/debugger.c
@@ -0,0 +1,241 @@
+/*
+ * Tegra GK20A GPU Debugger/Profiler Driver
+ *
+ * Copyright (c) 2013-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/log.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/mm.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/io.h>
+#include <nvgpu/utils.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/nvgpu_init.h>
+#include <nvgpu/debugger.h>
+#include <nvgpu/profiler.h>
+#include <nvgpu/power_features/power_features.h>
+
+/*
+ * API to get first channel from the list of all channels
+ * bound to the debug session
+ */
+struct nvgpu_channel *
+nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s)
+{
+	struct dbg_session_channel_data *ch_data;
+	struct nvgpu_channel *ch;
+	struct gk20a *g = dbg_s->g;
+
+	nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
+	if (nvgpu_list_empty(&dbg_s->ch_list)) {
+		nvgpu_mutex_release(&dbg_s->ch_list_lock);
+		return NULL;
+	}
+
+	ch_data = nvgpu_list_first_entry(&dbg_s->ch_list,
+				   dbg_session_channel_data,
+				   ch_entry);
+	ch = g->fifo.channel + ch_data->chid;
+
+	nvgpu_mutex_release(&dbg_s->ch_list_lock);
+
+	return ch;
+}
+
+void nvgpu_dbg_gpu_post_events(struct nvgpu_channel *ch)
+{
+	struct dbg_session_data *session_data;
+	struct dbg_session_gk20a *dbg_s;
+	struct gk20a *g = ch->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
+
+	/* guard against the session list being modified */
+	nvgpu_mutex_acquire(&ch->dbg_s_lock);
+
+	nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list,
+				dbg_session_data, dbg_s_entry) {
+		dbg_s = session_data->dbg_s;
+		if (dbg_s->dbg_events.events_enabled) {
+			nvgpu_log(g, gpu_dbg_gpu_dbg, "posting event on session id %d",
+					dbg_s->id);
+			nvgpu_log(g, gpu_dbg_gpu_dbg, "%d events pending",
+					dbg_s->dbg_events.num_pending_events);
+
+			dbg_s->dbg_events.num_pending_events++;
+
+			nvgpu_dbg_session_post_event(dbg_s);
+		}
+	}
+
+	nvgpu_mutex_release(&ch->dbg_s_lock);
+}
+
+bool nvgpu_dbg_gpu_broadcast_stop_trigger(struct nvgpu_channel *ch)
+{
+	struct dbg_session_data *session_data;
+	struct dbg_session_gk20a *dbg_s;
+	bool broadcast = false;
+	struct gk20a *g = ch->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " ");
+
+	/* guard against the session list being modified */
+	nvgpu_mutex_acquire(&ch->dbg_s_lock);
+
+	nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list,
+				dbg_session_data, dbg_s_entry) {
+		dbg_s = session_data->dbg_s;
+		if (dbg_s->broadcast_stop_trigger) {
+			nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn | gpu_dbg_intr,
+					"stop trigger broadcast enabled");
+			broadcast = true;
+			break;
+		}
+	}
+
+	nvgpu_mutex_release(&ch->dbg_s_lock);
+
+	return broadcast;
+}
+
+void nvgpu_dbg_gpu_clear_broadcast_stop_trigger(struct nvgpu_channel *ch)
+{
+	struct dbg_session_data *session_data;
+	struct dbg_session_gk20a *dbg_s;
+	struct gk20a *g = ch->g;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " ");
+
+	/* guard against the session list being modified */
+	nvgpu_mutex_acquire(&ch->dbg_s_lock);
+
+	nvgpu_list_for_each_entry(session_data, &ch->dbg_s_list,
+				dbg_session_data, dbg_s_entry) {
+		dbg_s = session_data->dbg_s;
+		if (dbg_s->broadcast_stop_trigger) {
+			nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn | gpu_dbg_intr,
+					"stop trigger broadcast disabled");
+			dbg_s->broadcast_stop_trigger = false;
+		}
+	}
+
+	nvgpu_mutex_release(&ch->dbg_s_lock);
+}
+
+u32 nvgpu_set_powergate_locked(struct dbg_session_gk20a *dbg_s,
+				bool mode)
+{
+	u32 err = 0U;
+	struct gk20a *g = dbg_s->g;
+
+	if (dbg_s->is_pg_disabled != mode) {
+		if (mode == false) {
+			g->dbg_powergating_disabled_refcount--;
+		}
+
+		/*
+		 * Allow powergate disable or enable only if
+		 * the global pg disabled refcount is zero
+		 */
+		if (g->dbg_powergating_disabled_refcount == 0) {
+			err = g->ops.debugger.dbg_set_powergate(dbg_s,
+									mode);
+		}
+
+		if (mode) {
+			g->dbg_powergating_disabled_refcount++;
+		}
+
+		dbg_s->is_pg_disabled = mode;
+	}
+
+	return err;
+}
+
+int nvgpu_dbg_set_powergate(struct dbg_session_gk20a *dbg_s, bool disable_powergate)
+{
+	int err = 0;
+	struct gk20a *g = dbg_s->g;
+
+	 /* This function must be called with g->dbg_sessions_lock held */
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %s",
+		   g->name, disable_powergate ? "disable" : "enable");
+
+	/*
+	 * Powergate mode here refers to railgate+powergate+clockgate
+	 * so in case slcg/blcg/elcg are disabled and railgating is enabled,
+	 * disable railgating and then set is_pg_disabled = true
+	 * Similarly re-enable railgating and not other features if they are not
+	 * enabled when powermode=MODE_ENABLE
+	 */
+	if (disable_powergate) {
+		/* save off current powergate, clk state.
+		 * set gpu module's can_powergate = 0.
+		 * set gpu module's clk to max.
+		 * while *a* debug session is active there will be no power or
+		 * clocking state changes allowed from mainline code (but they
+		 * should be saved).
+		 */
+
+		nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn,
+						"module busy");
+		err = gk20a_busy(g);
+		if (err != 0) {
+			return err;
+		}
+
+#ifdef CONFIG_NVGPU_NON_FUSA
+		err = nvgpu_cg_pg_disable(g);
+#endif
+		if (err == 0) {
+			dbg_s->is_pg_disabled = true;
+			nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn,
+					"pg disabled");
+		}
+	} else {
+		/* restore (can) powergate, clk state */
+		/* release pending exceptions to fault/be handled as usual */
+		/*TBD: ordering of these? */
+
+#ifdef CONFIG_NVGPU_NON_FUSA
+		err = nvgpu_cg_pg_enable(g);
+#endif
+		if (err == 0) {
+			dbg_s->is_pg_disabled = false;
+			nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn,
+					"pg enabled");
+		}
+
+		nvgpu_log(g, gpu_dbg_gpu_dbg | gpu_dbg_fn, "module idle");
+
+		gk20a_idle(g);
+	}
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_gpu_dbg, "%s powergate mode = %s done",
+		   g->name, disable_powergate ? "disable" : "enable");
+	return err;
+}
--- a/drivers/gpu/nvgpu/common/device.c
+++ b/drivers/gpu/nvgpu/common/device.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/device.h>
+#include <nvgpu/list.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/string.h>
+#include <nvgpu/log.h>
+
+#define device_dbg(g, fmt, args...)					\
+	do {								\
+		nvgpu_log(g, gpu_dbg_device, fmt, ##args);		\
+	} while (0)
+
+static inline const char *nvgpu_device_type_to_str(const struct nvgpu_device *dev)
+{
+	const char *str = "Unknown";
+
+	switch (dev->type) {
+	case NVGPU_DEVTYPE_GRAPHICS:
+		str = "GFX";
+		break;
+	case NVGPU_DEVTYPE_COPY0:
+		str = "CE0";
+		break;
+	case NVGPU_DEVTYPE_COPY1:
+		str = "CE1";
+		break;
+	case NVGPU_DEVTYPE_COPY2:
+		str = "CE2";
+		break;
+	case NVGPU_DEVTYPE_IOCTRL:
+		str = "IOCTRL";
+		break;
+	case NVGPU_DEVTYPE_LCE:
+		str = "LCE";
+		break;
+	default:
+		break;
+	}
+
+	return str;
+}
+
+void nvgpu_device_dump_dev(struct gk20a *g, const struct nvgpu_device *dev)
+{
+	device_dbg(g, "Device %s:%d",
+		   nvgpu_device_type_to_str(dev), dev->inst_id);
+	device_dbg(g, "  EngineID:  %2u  FaultID: %2u",
+		   dev->engine_id, dev->fault_id);
+	device_dbg(g, "  RunlistID: %2u  IntrID:  %2u  ResetID: %u",
+		   dev->runlist_id, dev->intr_id, dev->reset_id);
+	device_dbg(g, "  PRI Base: 0x%x", dev->pri_base);
+}
+
+/*
+ * Faciliate the parsing of the TOP array describing the devices present in the
+ * GPU.
+ */
+static int nvgpu_device_parse_hw_table(struct gk20a *g)
+{
+	int ret = 0;
+	u32 token = NVGPU_DEVICE_TOKEN_INIT;
+	struct nvgpu_device *dev;
+	struct nvgpu_list_node *devlist;
+
+	while (true) {
+		dev = g->ops.top.parse_next_device(g, &token);
+		if (dev == NULL) {
+			break;
+		}
+
+		nvgpu_device_dump_dev(g, dev);
+
+		/*
+		 * Otherwise we have a device - let's add it to the right device
+		 * list.
+		 */
+		devlist = &g->devs->devlist_heads[dev->type];
+
+		nvgpu_list_add_tail(&dev->dev_list_node, devlist);
+		g->devs->dev_counts[dev->type] += 1;
+	}
+
+	return ret;
+}
+
+/*
+ * Faciliate reading the HW register table into a software abstraction. This is
+ * done only on the first boot as the table will never change dynamically.
+ */
+int nvgpu_device_init(struct gk20a *g)
+{
+	u32 i;
+
+	device_dbg(g, "Initializating GPU device list");
+
+	/*
+	 * Ground work - make sure we aren't doing this again and that we have
+	 * all the necessary data structures.
+	 */
+	if (g->devs != NULL) {
+		device_dbg(g, "  GPU device list already present. Done.");
+		return 0;
+	}
+
+	g->devs = nvgpu_kzalloc(g, sizeof(*g->devs));
+	if (g->devs == NULL) {
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < NVGPU_MAX_DEVTYPE; i++) {
+		nvgpu_init_list_node(&g->devs->devlist_heads[i]);
+	}
+
+	return nvgpu_device_parse_hw_table(g);
+}
+
+static void nvgpu_device_cleanup_devtype(struct gk20a *g,
+					 struct nvgpu_list_node *list)
+{
+	struct nvgpu_device *dev;
+
+	while (!nvgpu_list_empty(list)) {
+		dev = nvgpu_list_first_entry(list,
+					     nvgpu_device,
+					     dev_list_node);
+		nvgpu_list_del(&dev->dev_list_node);
+		nvgpu_kfree(g, dev);
+	}
+}
+
+void nvgpu_device_cleanup(struct gk20a *g)
+{
+	u32 i;
+	struct nvgpu_list_node *devlist;
+
+	device_dbg(g, "Releasing GPU device list");
+
+	/*
+	 * Make unit testing a bit easier.
+	 */
+	if (g->devs == NULL) {
+		device_dbg(g, "  Already done.");
+		return;
+	}
+
+	for (i = 0; i < NVGPU_MAX_DEVTYPE; i++) {
+		devlist = &g->devs->devlist_heads[i];
+
+		if (devlist == NULL) {
+			continue;
+		}
+
+		nvgpu_device_cleanup_devtype(g, devlist);
+	}
+
+	nvgpu_kfree(g, g->devs);
+	g->devs = NULL;
+}
+
+/*
+ * Find the instance passed. Do this by simply traversing the linked list; it's
+ * not particularly efficient, but we aren't expecting there to ever be _that_
+ * many devices.
+ *
+ * Return a pointer to the device or NULL of the inst ID is out of range.
+ */
+static const struct nvgpu_device *dev_instance_from_devlist(
+	struct nvgpu_list_node *devlist, u32 inst_id)
+{
+	struct nvgpu_device *dev;
+
+	nvgpu_list_for_each_entry(dev, devlist, nvgpu_device, dev_list_node) {
+		if (dev->inst_id == inst_id) {
+			return dev;
+		}
+	}
+
+	return NULL;
+}
+
+const struct nvgpu_device *nvgpu_device_get(struct gk20a *g,
+		     u32 type, u32 inst_id)
+{
+	const struct nvgpu_device *dev;
+	struct nvgpu_list_node *device_list;
+
+	if (type >= NVGPU_MAX_DEVTYPE) {
+		return NULL;
+	}
+
+	device_list = &g->devs->devlist_heads[type];
+	dev = dev_instance_from_devlist(device_list, inst_id);
+
+	if (dev == NULL) {
+		return NULL;
+	}
+
+	return dev;
+}
+
+u32 nvgpu_device_count(struct gk20a *g, u32 type)
+{
+	if (type >= NVGPU_MAX_DEVTYPE) {
+		return 0U;
+	}
+
+	return g->devs->dev_counts[type];
+}
+
+/*
+ * Internal function to query copy engines; async_only specifies whether
+ * this function should or should not include the GR copy engines (CEs that
+ * share a runlist with the GR engine(s)).
+ *
+ * This function basically iterates over two distinct copy engine lists:
+ * first the COPY0-2 (the old way of describing copy engines) and the LCE
+ * list (the new in Pascal way of describing copy engines).
+ */
+static u32 nvgpu_device_do_get_copies(struct gk20a *g,
+				      bool async_only,
+				      const struct nvgpu_device **ces,
+				      u32 max)
+{
+	u32 i;
+	u32 copies = 0U;
+	const struct nvgpu_device *dev;
+	const struct nvgpu_device *gr_dev;
+
+	if (max == 0U) {
+		return 0U;
+	}
+
+	gr_dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0U);
+	nvgpu_assert(gr_dev != NULL);
+
+	/*
+	 * Start with the COPY0-2 engines. Note the awkward instance ID.
+	 */
+	for (i = NVGPU_DEVTYPE_COPY0; i <= NVGPU_DEVTYPE_COPY2; i++) {
+		dev = nvgpu_device_get(g, i, i - NVGPU_DEVTYPE_COPY0);
+		if (dev == NULL) {
+			continue;
+		}
+
+		if (async_only &&
+		    dev->runlist_id == gr_dev->runlist_id) {
+			/* It's a GRCE, skip it per async_only. */
+			continue;
+		}
+
+		ces[copies] = dev;
+		copies = nvgpu_safe_add_u32(copies, 1U);
+		if (copies == max) {
+			return copies;
+		}
+	}
+
+	for (i = 0; i < nvgpu_device_count(g, NVGPU_DEVTYPE_LCE); i++) {
+		dev = nvgpu_device_get(g, NVGPU_DEVTYPE_LCE, i);
+		nvgpu_assert(dev != NULL);
+
+		if (async_only &&
+		    dev->runlist_id == gr_dev->runlist_id) {
+			/* It's a GRCE, skip it per async_only. */
+			continue;
+		}
+
+		ces[copies] = dev;
+		copies = nvgpu_safe_add_u32(copies, 1U);
+		if (copies == max) {
+			return copies;
+		}
+	}
+
+	return copies;
+}
+
+u32 nvgpu_device_get_async_copies(struct gk20a *g,
+				  const struct nvgpu_device **ces,
+				  u32 max)
+{
+	return nvgpu_device_do_get_copies(g, true, ces, max);
+}
+
+u32 nvgpu_device_get_copies(struct gk20a *g,
+			    const struct nvgpu_device **ces,
+			    u32 max)
+{
+	return nvgpu_device_do_get_copies(g, false, ces, max);
+}
+
+/*
+ * Note: this kind of bleeds HW details into the core code. Eventually this
+ * should be handled by a translation table. However, for now, HW has kept the
+ * device type values consistent across chips and nvgpu already has this present
+ * in core code.
+ *
+ * Once a per-chip translation table exists we can translate and then do a
+ * comparison.
+ */
+bool nvgpu_device_is_ce(struct gk20a *g, const struct nvgpu_device *dev)
+{
+	if (dev->type == NVGPU_DEVTYPE_COPY0 ||
+	    dev->type == NVGPU_DEVTYPE_COPY1 ||
+	    dev->type == NVGPU_DEVTYPE_COPY2 ||
+	    dev->type == NVGPU_DEVTYPE_LCE) {
+		return true;
+	}
+
+	return false;
+}
+
+bool nvgpu_device_is_graphics(struct gk20a *g, const struct nvgpu_device *dev)
+{
+	return dev->type == NVGPU_DEVTYPE_GRAPHICS;
+}
--- a/drivers/gpu/nvgpu/common/ecc.c
+++ b/drivers/gpu/nvgpu/common/ecc.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/gr/gr_ecc.h>
+#include <nvgpu/ltc.h>
+
+void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat)
+{
+	struct nvgpu_ecc *ecc = &g->ecc;
+
+	nvgpu_init_list_node(&stat->node);
+
+	nvgpu_mutex_acquire(&ecc->stats_lock);
+
+	nvgpu_list_add_tail(&stat->node, &ecc->stats_list);
+	ecc->stats_count = nvgpu_safe_add_s32(ecc->stats_count, 1);
+
+	nvgpu_mutex_release(&ecc->stats_lock);
+}
+
+void nvgpu_ecc_stat_del(struct gk20a *g, struct nvgpu_ecc_stat *stat)
+{
+	struct nvgpu_ecc *ecc = &g->ecc;
+
+	nvgpu_mutex_acquire(&ecc->stats_lock);
+
+	nvgpu_list_del(&stat->node);
+	ecc->stats_count = nvgpu_safe_sub_s32(ecc->stats_count, 1);
+
+	nvgpu_mutex_release(&ecc->stats_lock);
+}
+
+int nvgpu_ecc_counter_init(struct gk20a *g,
+		struct nvgpu_ecc_stat **statp, const char *name)
+{
+	struct nvgpu_ecc_stat *stat;
+
+	stat = nvgpu_kzalloc(g, sizeof(*stat));
+	if (stat == NULL) {
+		nvgpu_err(g, "ecc counter alloc failed");
+		return -ENOMEM;
+	}
+
+	(void)strncpy(stat->name, name, NVGPU_ECC_STAT_NAME_MAX_SIZE - 1U);
+	nvgpu_ecc_stat_add(g, stat);
+	*statp = stat;
+	return 0;
+}
+
+void nvgpu_ecc_counter_deinit(struct gk20a *g, struct nvgpu_ecc_stat **statp)
+{
+	struct nvgpu_ecc_stat *stat;
+
+	if (*statp == NULL) {
+		return;
+	}
+
+	stat = *statp;
+
+	nvgpu_ecc_stat_del(g, stat);
+	nvgpu_kfree(g, stat);
+	*statp = NULL;
+}
+
+/* release all ecc_stat */
+void nvgpu_ecc_free(struct gk20a *g)
+{
+	struct nvgpu_ecc *ecc = &g->ecc;
+
+	nvgpu_gr_ecc_free(g);
+	nvgpu_ltc_ecc_free(g);
+
+	if (g->ops.fb.ecc.free != NULL) {
+		g->ops.fb.ecc.free(g);
+	}
+
+#ifdef CONFIG_NVGPU_DGPU
+	if (g->ops.fb.fbpa_ecc_free != NULL) {
+		g->ops.fb.fbpa_ecc_free(g);
+	}
+#endif
+
+	if (g->ops.pmu.ecc_free != NULL) {
+		g->ops.pmu.ecc_free(g);
+	}
+
+	nvgpu_mutex_acquire(&ecc->stats_lock);
+	WARN_ON(!nvgpu_list_empty(&ecc->stats_list));
+	nvgpu_mutex_release(&ecc->stats_lock);
+
+	(void)memset(ecc, 0, sizeof(*ecc));
+}
+
+int nvgpu_ecc_init_support(struct gk20a *g)
+{
+	struct nvgpu_ecc *ecc = &g->ecc;
+
+	if (ecc->initialized) {
+		return 0;
+	}
+
+	nvgpu_mutex_init(&ecc->stats_lock);
+	nvgpu_init_list_node(&ecc->stats_list);
+
+	return 0;
+}
+
+/**
+ * Note that this function is to be called after all units requiring ecc stats
+ * have added entries to ecc->stats_list.
+ */
+int nvgpu_ecc_finalize_support(struct gk20a *g)
+{
+#ifdef CONFIG_NVGPU_SYSFS
+	int err;
+#endif
+
+	if (g->ecc.initialized) {
+		return 0;
+	}
+
+#ifdef CONFIG_NVGPU_SYSFS
+	err = nvgpu_ecc_sysfs_init(g);
+	if (err != 0) {
+		nvgpu_ecc_free(g);
+		return err;
+	}
+#endif
+
+	g->ecc.initialized = true;
+
+	return 0;
+}
+
+void nvgpu_ecc_remove_support(struct gk20a *g)
+{
+	if (!g->ecc.initialized) {
+		return;
+	}
+
+#ifdef CONFIG_NVGPU_SYSFS
+	nvgpu_ecc_sysfs_remove(g);
+#endif
+	nvgpu_ecc_free(g);
+
+	nvgpu_mutex_destroy(&g->ecc.stats_lock);
+}
--- a/drivers/gpu/nvgpu/common/engine_queues/engine_dmem_queue.c
+++ b/drivers/gpu/nvgpu/common/engine_queues/engine_dmem_queue.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/falcon.h>
+#include <nvgpu/log.h>
+
+#include "engine_mem_queue_priv.h"
+#include "engine_dmem_queue.h"
+
+/* DMEM-Q specific ops */
+static int engine_dmem_queue_push(struct nvgpu_falcon *flcn,
+	struct nvgpu_engine_mem_queue *queue, u32 dst, void *data, u32 size)
+{
+	struct gk20a *g = queue->g;
+	int err = 0;
+
+	err = nvgpu_falcon_copy_to_dmem(flcn, dst, data, size, 0);
+	if (err != 0) {
+		nvgpu_err(g, "flcn-%d, queue-%d", queue->flcn_id, queue->id);
+		nvgpu_err(g, "dmem queue write failed");
+		goto exit;
+	}
+
+exit:
+	return err;
+}
+
+static int engine_dmem_queue_pop(struct nvgpu_falcon *flcn,
+	struct nvgpu_engine_mem_queue *queue, u32 src, void *data, u32 size)
+{
+	struct gk20a *g = queue->g;
+	int err = 0;
+
+	err = nvgpu_falcon_copy_from_dmem(flcn, src, data, size, 0);
+	if (err != 0) {
+		nvgpu_err(g, "flcn-%d, queue-%d", queue->flcn_id, queue->id);
+		nvgpu_err(g, "dmem queue read failed");
+		goto exit;
+	}
+
+exit:
+	return err;
+}
+
+/* assign DMEM queue type specific ops */
+void engine_dmem_queue_init(struct nvgpu_engine_mem_queue *queue)
+{
+	queue->push = engine_dmem_queue_push;
+	queue->pop = engine_dmem_queue_pop;
+}
--- a/drivers/gpu/nvgpu/common/engine_queues/engine_dmem_queue.h
+++ b/drivers/gpu/nvgpu/common/engine_queues/engine_dmem_queue.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_ENGINE_DMEM_QUEUE_H
+#define NVGPU_ENGINE_DMEM_QUEUE_H
+
+void engine_dmem_queue_init(struct nvgpu_engine_mem_queue *queue);
+
+#endif /* NVGPU_ENGINE_DMEM_QUEUE_H */
--- a/drivers/gpu/nvgpu/common/engine_queues/engine_emem_queue.c
+++ b/drivers/gpu/nvgpu/common/engine_queues/engine_emem_queue.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/falcon.h>
+#include <nvgpu/log.h>
+
+#include "engine_mem_queue_priv.h"
+#include "engine_emem_queue.h"
+
+/* EMEM-Q specific ops */
+static int engine_emem_queue_push(struct nvgpu_falcon *flcn,
+	struct nvgpu_engine_mem_queue *queue, u32 dst, void *data, u32 size)
+{
+	struct gk20a *g = queue->g;
+	int err = 0;
+
+	err = nvgpu_falcon_copy_to_emem(flcn, dst, data, size, 0);
+	if (err != 0) {
+		nvgpu_err(g, "flcn-%d, queue-%d", queue->flcn_id, queue->id);
+		nvgpu_err(g, "emem queue write failed");
+		goto exit;
+	}
+
+exit:
+	return err;
+}
+
+static int engine_emem_queue_pop(struct nvgpu_falcon *flcn,
+	struct nvgpu_engine_mem_queue *queue, u32 src, void *data, u32 size)
+{
+	struct gk20a *g = queue->g;
+	int err = 0;
+
+	err = nvgpu_falcon_copy_from_emem(flcn, src, data, size, 0);
+	if (err != 0) {
+		nvgpu_err(g, "flcn-%d, queue-%d", queue->flcn_id, queue->id);
+		nvgpu_err(g, "emem queue read failed");
+		goto exit;
+	}
+
+exit:
+	return err;
+}
+
+/* assign EMEM queue type specific ops */
+void engine_emem_queue_init(struct nvgpu_engine_mem_queue *queue)
+{
+	queue->push = engine_emem_queue_push;
+	queue->pop = engine_emem_queue_pop;
+}
--- a/drivers/gpu/nvgpu/common/engine_queues/engine_emem_queue.h
+++ b/drivers/gpu/nvgpu/common/engine_queues/engine_emem_queue.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_ENGINE_EMEM_QUEUE_H
+#define NVGPU_ENGINE_EMEM_QUEUE_H
+
+void engine_emem_queue_init(struct nvgpu_engine_mem_queue *queue);
+
+#endif /* NVGPU_ENGINE_EMEM_QUEUE_H */
--- a/drivers/gpu/nvgpu/common/engine_queues/engine_fb_queue.c
+++ b/drivers/gpu/nvgpu/common/engine_queues/engine_fb_queue.c
@@ -0,0 +1,603 @@
+/*
+ * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/log.h>
+#include <nvgpu/errno.h>
+#include <nvgpu/types.h>
+#include <nvgpu/flcnif_cmn.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/string.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/engine_queue.h>
+#include <nvgpu/engine_fb_queue.h>
+#include <nvgpu/pmu/pmuif/cmn.h>
+
+#include "engine_fb_queue_priv.h"
+
+/* FB-Q ops */
+static int engine_fb_queue_head(struct nvgpu_engine_fb_queue *queue,
+				u32 *head, bool set)
+{
+	return queue->queue_head(queue->g, queue->id, queue->index, head, set);
+}
+
+static int engine_fb_queue_tail(struct nvgpu_engine_fb_queue *queue,
+				u32 *tail, bool set)
+{
+	struct gk20a *g = queue->g;
+	int err;
+
+	if (set == false && PMU_IS_COMMAND_QUEUE(queue->id)) {
+		*tail = queue->fbq.tail;
+		err = 0;
+	} else {
+		err = queue->queue_tail(g, queue->id, queue->index, tail, set);
+	}
+
+	return err;
+}
+
+static inline u32 engine_fb_queue_get_next(struct nvgpu_engine_fb_queue *queue,
+					   u32 head)
+{
+		return (head + 1U) % queue->size;
+}
+
+static bool engine_fb_queue_has_room(struct nvgpu_engine_fb_queue *queue,
+	u32 size)
+{
+	u32 head = 0;
+	u32 tail = 0;
+	u32 next_head = 0;
+	int err = 0;
+
+	err = queue->head(queue, &head, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(queue->g, "queue head GET failed");
+		goto exit;
+	}
+
+	err = queue->tail(queue, &tail, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(queue->g, "queue tail GET failed");
+		goto exit;
+	}
+
+	next_head = engine_fb_queue_get_next(queue, head);
+
+exit:
+	return next_head != tail;
+}
+
+static int engine_fb_queue_write(struct nvgpu_engine_fb_queue *queue,
+	u32 offset, u8 *src, u32 size)
+{
+	struct gk20a *g = queue->g;
+	struct nv_falcon_fbq_hdr *fb_q_hdr = (struct nv_falcon_fbq_hdr *)
+		(void *)queue->fbq.work_buffer;
+	u32 entry_offset = 0U;
+	int err = 0;
+
+	if (queue->fbq.work_buffer == NULL) {
+		nvgpu_err(g, "Invalid/Unallocated work buffer");
+		err = -EINVAL;
+		goto exit;
+	}
+
+	/* Fill out FBQ hdr, that is in the work buffer */
+	fb_q_hdr->element_index = (u8)offset;
+
+	/* check queue entry size */
+	if (fb_q_hdr->heap_size >= (u16)queue->fbq.element_size) {
+		err = -EINVAL;
+		goto exit;
+	}
+
+	/* get offset to this element entry */
+	entry_offset = offset * queue->fbq.element_size;
+
+	/* copy cmd to super-surface */
+	nvgpu_mem_wr_n(g, queue->fbq.super_surface_mem,
+		queue->fbq.fb_offset + entry_offset,
+		queue->fbq.work_buffer, queue->fbq.element_size);
+
+exit:
+	return err;
+}
+
+static int engine_fb_queue_set_element_use_state(
+	struct nvgpu_engine_fb_queue *queue, u32 queue_pos, bool set)
+{
+	int err = 0;
+
+	if (queue_pos >= queue->size) {
+		err = -EINVAL;
+		goto exit;
+	}
+
+	if (nvgpu_test_bit(queue_pos,
+		(void *)&queue->fbq.element_in_use) && set) {
+		nvgpu_err(queue->g,
+			"FBQ last received queue element not processed yet"
+			" queue_pos %d", queue_pos);
+		err = -EINVAL;
+		goto exit;
+	}
+
+	if (set) {
+		nvgpu_set_bit(queue_pos, (void *)&queue->fbq.element_in_use);
+	} else {
+		nvgpu_clear_bit(queue_pos, (void *)&queue->fbq.element_in_use);
+	}
+
+exit:
+	return err;
+}
+
+static int engine_fb_queue_is_element_in_use(
+	struct nvgpu_engine_fb_queue *queue,
+	u32 queue_pos, bool *in_use)
+{
+	int err = 0;
+
+	if (queue_pos >= queue->size) {
+		err = -EINVAL;
+		goto exit;
+	}
+
+	*in_use = nvgpu_test_bit(queue_pos, (void *)&queue->fbq.element_in_use);
+
+exit:
+	return err;
+}
+
+static int engine_fb_queue_sweep(struct nvgpu_engine_fb_queue *queue)
+{
+	u32 head;
+	u32 tail;
+	bool in_use = false;
+	int err = 0;
+
+	tail = queue->fbq.tail;
+	err = queue->head(queue, &head, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(queue->g, "flcn-%d queue-%d, position GET failed",
+			queue->flcn_id, queue->id);
+		goto exit;
+	}
+
+	/*
+	 * Step from tail forward in the queue,
+	 * to see how many consecutive entries
+	 * can be made available.
+	 */
+	while (tail != head) {
+		if (engine_fb_queue_is_element_in_use(queue,
+			tail, &in_use) != 0) {
+			break;
+		}
+
+		if (in_use) {
+			break;
+		}
+
+		tail = engine_fb_queue_get_next(queue, tail);
+	}
+
+	/* Update tail */
+	queue->fbq.tail = tail;
+
+exit:
+	return err;
+}
+
+u32 nvgpu_engine_fb_queue_get_position(struct nvgpu_engine_fb_queue *queue)
+{
+	return queue->position;
+}
+
+/* return the queue element size */
+u32 nvgpu_engine_fb_queue_get_element_size(struct nvgpu_engine_fb_queue *queue)
+{
+	return queue->fbq.element_size;
+}
+
+/* return the queue offset from super surface FBQ's */
+u32 nvgpu_engine_fb_queue_get_offset(struct nvgpu_engine_fb_queue *queue)
+{
+	return queue->fbq.fb_offset;
+}
+
+/* lock work buffer of queue */
+void nvgpu_engine_fb_queue_lock_work_buffer(struct nvgpu_engine_fb_queue *queue)
+{
+	/* acquire work buffer mutex */
+	nvgpu_mutex_acquire(&queue->fbq.work_buffer_mutex);
+}
+
+/* unlock work buffer of queue */
+void nvgpu_engine_fb_queue_unlock_work_buffer(
+					struct nvgpu_engine_fb_queue *queue)
+{
+	/* release work buffer mutex */
+	nvgpu_mutex_release(&queue->fbq.work_buffer_mutex);
+}
+
+/* return a pointer of queue work buffer */
+u8 *nvgpu_engine_fb_queue_get_work_buffer(struct nvgpu_engine_fb_queue *queue)
+{
+	return queue->fbq.work_buffer;
+}
+
+int nvgpu_engine_fb_queue_free_element(struct nvgpu_engine_fb_queue *queue,
+			u32 queue_pos)
+{
+	int err = 0;
+
+	err = engine_fb_queue_set_element_use_state(queue,
+		queue_pos, false);
+	if (err != 0) {
+		nvgpu_err(queue->g, "fb queue elelment %d free failed",
+			queue_pos);
+		goto exit;
+	}
+
+	err = engine_fb_queue_sweep(queue);
+
+exit:
+	return err;
+}
+
+/* queue is_empty check with lock */
+bool nvgpu_engine_fb_queue_is_empty(struct nvgpu_engine_fb_queue *queue)
+{
+	u32 q_head = 0;
+	u32 q_tail = 0;
+	int err = 0;
+
+	if (queue == NULL) {
+		return true;
+	}
+
+	/* acquire mutex */
+	nvgpu_mutex_acquire(&queue->mutex);
+
+	err = queue->head(queue, &q_head, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(queue->g, "flcn-%d queue-%d, head GET failed",
+			queue->flcn_id, queue->id);
+		goto exit;
+	}
+
+	err = queue->tail(queue, &q_tail, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(queue->g, "flcn-%d queue-%d, tail GET failed",
+			queue->flcn_id, queue->id);
+		goto exit;
+	}
+
+exit:
+	/* release mutex */
+	nvgpu_mutex_release(&queue->mutex);
+
+	return q_head == q_tail;
+}
+
+static int engine_fb_queue_prepare_write(struct nvgpu_engine_fb_queue *queue,
+				u32 size)
+{
+	int err = 0;
+
+	/* make sure there's enough free space for the write */
+	if (!engine_fb_queue_has_room(queue, size)) {
+		nvgpu_log_info(queue->g, "queue full: queue-id %d: index %d",
+			       queue->id, queue->index);
+		err = -EAGAIN;
+		goto exit;
+	}
+
+	err = queue->head(queue, &queue->position, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(queue->g, "flcn-%d queue-%d, position GET failed",
+			queue->flcn_id, queue->id);
+		goto exit;
+	}
+
+exit:
+	return err;
+}
+
+/* queue push operation with lock */
+int nvgpu_engine_fb_queue_push(struct nvgpu_engine_fb_queue *queue,
+			void *data, u32 size)
+{
+	struct gk20a *g;
+	int err = 0;
+
+	if (queue == NULL) {
+		return -EINVAL;
+	}
+
+	g = queue->g;
+
+	nvgpu_log_fn(g, " ");
+
+	if (queue->oflag != OFLAG_WRITE) {
+		nvgpu_err(queue->g, "flcn-%d, queue-%d not opened for write",
+			queue->flcn_id, queue->id);
+		err = -EINVAL;
+		goto exit;
+	}
+
+	/* acquire mutex */
+	nvgpu_mutex_acquire(&queue->mutex);
+
+	err = engine_fb_queue_prepare_write(queue, size);
+	if (err != 0) {
+		goto unlock_mutex;
+	}
+
+	/* Bounds check size */
+	if (size > queue->fbq.element_size) {
+		nvgpu_err(g, "size too large size=0x%x", size);
+		goto unlock_mutex;
+	}
+
+	/* Set queue element in use */
+	if (engine_fb_queue_set_element_use_state(queue,
+		queue->position, true) != 0) {
+		nvgpu_err(g,
+			"fb-queue element in use map is in invalid state");
+		err = -EINVAL;
+		goto unlock_mutex;
+	}
+
+	/* write data to FB */
+	err = engine_fb_queue_write(queue, queue->position, data, size);
+	if (err != 0) {
+		nvgpu_err(g, "write to fb-queue failed");
+		goto unlock_mutex;
+	}
+
+	queue->position = engine_fb_queue_get_next(queue,
+			queue->position);
+
+	err = queue->head(queue, &queue->position, QUEUE_SET);
+	if (err != 0) {
+		nvgpu_err(queue->g, "flcn-%d queue-%d, position SET failed",
+			queue->flcn_id, queue->id);
+		goto unlock_mutex;
+	}
+
+unlock_mutex:
+	/* release mutex */
+	nvgpu_mutex_release(&queue->mutex);
+exit:
+	if (err != 0) {
+		nvgpu_err(queue->g, "falcon id-%d, queue id-%d, failed",
+			queue->flcn_id, queue->id);
+	}
+
+	return err;
+}
+
+/* queue pop operation with lock */
+int nvgpu_engine_fb_queue_pop(struct nvgpu_engine_fb_queue *queue,
+	void *data, u32 size, u32 *bytes_read)
+{
+	struct gk20a *g;
+	struct pmu_hdr *hdr;
+	u32 entry_offset = 0U;
+	int err = 0;
+
+	if (queue == NULL) {
+		return -EINVAL;
+	}
+
+	g = queue->g;
+	hdr = (struct pmu_hdr *) (void *) (queue->fbq.work_buffer +
+			sizeof(struct nv_falcon_fbq_msgq_hdr));
+
+	nvgpu_log_fn(g, " ");
+
+	if (queue->oflag != OFLAG_READ) {
+		nvgpu_err(g, "flcn-%d, queue-%d, not opened for read",
+			queue->flcn_id, queue->id);
+		err = -EINVAL;
+		goto exit;
+	}
+
+	/* acquire mutex */
+	nvgpu_mutex_acquire(&queue->mutex);
+
+	err = queue->tail(queue, &queue->position, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(g, "flcn-%d queue-%d, position GET failed",
+			queue->flcn_id, queue->id);
+		goto unlock_mutex;
+	}
+
+	*bytes_read = 0U;
+
+	/* Check size */
+	if ((size + queue->fbq.read_position) >= queue->fbq.element_size) {
+		nvgpu_err(g,
+			"Attempt to read > than queue element size "
+			"for queue id-%d", queue->id);
+		err = -EINVAL;
+		goto unlock_mutex;
+	}
+
+	entry_offset = queue->position * queue->fbq.element_size;
+
+	/*
+	 * If first read for this queue element then read whole queue
+	 * element into work buffer.
+	 */
+	if (queue->fbq.read_position == 0U) {
+		nvgpu_mem_rd_n(g, queue->fbq.super_surface_mem,
+			/* source (FBQ data) offset*/
+			queue->fbq.fb_offset + entry_offset,
+			/* destination buffer */
+			(void *)queue->fbq.work_buffer,
+			/* copy size */
+			queue->fbq.element_size);
+
+		/* Check size in hdr of MSG just read */
+		if (hdr->size >= queue->fbq.element_size) {
+			nvgpu_err(g, "Super Surface read failed");
+			err = -ERANGE;
+			goto unlock_mutex;
+		}
+	}
+
+	nvgpu_memcpy((u8 *)data, (u8 *)queue->fbq.work_buffer +
+		queue->fbq.read_position +
+		sizeof(struct nv_falcon_fbq_msgq_hdr),
+		size);
+
+	/* update current position */
+	queue->fbq.read_position += size;
+
+	/* If reached end of this queue element, move on to next. */
+	if (queue->fbq.read_position >= hdr->size) {
+		queue->fbq.read_position = 0U;
+		/* Increment queue index. */
+		queue->position = engine_fb_queue_get_next(queue,
+			queue->position);
+	}
+
+	*bytes_read = size;
+
+	err = queue->tail(queue, &queue->position, QUEUE_SET);
+	if (err != 0) {
+		nvgpu_err(g, "flcn-%d queue-%d, position SET failed",
+			queue->flcn_id, queue->id);
+		goto unlock_mutex;
+	}
+
+unlock_mutex:
+	/* release mutex */
+	nvgpu_mutex_release(&queue->mutex);
+exit:
+	if (err != 0) {
+		nvgpu_err(g, "falcon id-%d, queue id-%d, failed",
+			queue->flcn_id, queue->id);
+	}
+
+	return err;
+}
+
+void nvgpu_engine_fb_queue_free(struct nvgpu_engine_fb_queue **queue_p)
+{
+	struct nvgpu_engine_fb_queue *queue = NULL;
+	struct gk20a *g;
+
+	if ((queue_p == NULL) || (*queue_p == NULL)) {
+		return;
+	}
+
+	queue = *queue_p;
+
+	g = queue->g;
+
+	nvgpu_log_info(g, "flcn id-%d q-id %d: index %d ",
+		      queue->flcn_id, queue->id, queue->index);
+
+	nvgpu_kfree(g, queue->fbq.work_buffer);
+	nvgpu_mutex_destroy(&queue->fbq.work_buffer_mutex);
+
+	/* destroy mutex */
+	nvgpu_mutex_destroy(&queue->mutex);
+
+	nvgpu_kfree(g, queue);
+	*queue_p = NULL;
+}
+
+int nvgpu_engine_fb_queue_init(struct nvgpu_engine_fb_queue **queue_p,
+	struct nvgpu_engine_fb_queue_params params)
+{
+	struct nvgpu_engine_fb_queue *queue = NULL;
+	struct gk20a *g = params.g;
+	int err = 0;
+
+	if (queue_p == NULL) {
+		return -EINVAL;
+	}
+
+	queue = (struct nvgpu_engine_fb_queue *)
+		   nvgpu_kmalloc(g, sizeof(struct nvgpu_engine_fb_queue));
+
+	if (queue == NULL) {
+		return -ENOMEM;
+	}
+
+	queue->g = params.g;
+	queue->flcn_id = params.flcn_id;
+	queue->id = params.id;
+	queue->index = params.index;
+	queue->size = params.size;
+	queue->oflag = params.oflag;
+
+	queue->fbq.tail = 0U;
+	queue->fbq.element_in_use = 0U;
+	queue->fbq.read_position = 0U;
+	queue->fbq.super_surface_mem = params.super_surface_mem;
+	queue->fbq.element_size = params.fbq_element_size;
+	queue->fbq.fb_offset = params.fbq_offset;
+
+	queue->position = 0U;
+
+	queue->queue_head = params.queue_head;
+	queue->queue_tail = params.queue_tail;
+
+	queue->head = engine_fb_queue_head;
+	queue->tail = engine_fb_queue_tail;
+
+	/* init mutex */
+	nvgpu_mutex_init(&queue->mutex);
+
+	/* init mutex */
+	nvgpu_mutex_init(&queue->fbq.work_buffer_mutex);
+
+	queue->fbq.work_buffer = nvgpu_kzalloc(g, queue->fbq.element_size);
+	if (queue->fbq.work_buffer == NULL) {
+		err = -ENOMEM;
+		goto free_work_mutex;
+	}
+
+	nvgpu_log_info(g,
+		"flcn id-%d q-id %d: index %d, size 0x%08x",
+		queue->flcn_id, queue->id, queue->index,
+		queue->size);
+
+	*queue_p = queue;
+
+	return 0;
+
+free_work_mutex:
+	nvgpu_mutex_destroy(&queue->fbq.work_buffer_mutex);
+	nvgpu_mutex_destroy(&queue->mutex);
+	nvgpu_kfree(g, queue);
+
+	return err;
+}
--- a/drivers/gpu/nvgpu/common/engine_queues/engine_fb_queue_priv.h
+++ b/drivers/gpu/nvgpu/common/engine_queues/engine_fb_queue_priv.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_ENGINE_FB_QUEUE_PRIV_H
+#define NVGPU_ENGINE_FB_QUEUE_PRIV_H
+
+#include <nvgpu/lock.h>
+
+struct nvgpu_engine_fb_queue {
+	struct gk20a *g;
+	u32 flcn_id;
+
+	/* used by nvgpu, for command LPQ/HPQ */
+	struct nvgpu_mutex mutex;
+
+	/* current write position */
+	u32 position;
+	/* logical queue identifier */
+	u32 id;
+	/* physical queue index */
+	u32 index;
+	/* in bytes */
+	u32 size;
+	/* open-flag */
+	u32 oflag;
+
+	/* members unique to the FB version of the falcon queues */
+	struct {
+		/* Holds super surface base address */
+		struct nvgpu_mem *super_surface_mem;
+
+		/*
+		 * Holds the offset of queue data (0th element).
+		 * This is used for FB Queues to hold a offset of
+		 * Super Surface for this queue.
+		 */
+		 u32 fb_offset;
+
+		/*
+		 * Define the size of a single queue element.
+		 * queues_size above is used for the number of
+		 * queue elements.
+		 */
+		u32 element_size;
+
+		/* To keep track of elements in use */
+		u64 element_in_use;
+
+		/*
+		 * Define a pointer to a local (SYSMEM) allocated
+		 * buffer to hold a single queue element
+		 * it is being assembled.
+		 */
+		 u8 *work_buffer;
+		 struct nvgpu_mutex work_buffer_mutex;
+
+		/*
+		 * Tracks how much of the current FB Queue MSG queue
+		 * entry have been read. This is needed as functions read
+		 * the MSG queue as a byte stream, rather
+		 * than reading a whole MSG at a time.
+		 */
+		u32 read_position;
+
+		/*
+		 * Tail as tracked on the nvgpu "side".  Because the queue
+		 * elements and its associated payload (which is also moved
+		 * PMU->nvgpu through the FB CMD Queue) can't be free-ed until
+		 * the command is complete, response is received and any "out"
+		 * payload delivered to the client, it is necessary for the
+		 * nvgpu to track it's own version of "tail".  This one is
+		 * incremented as commands and completed entries are found
+		 * following tail.
+		 */
+		u32 tail;
+	} fbq;
+
+	/* engine and queue specific ops */
+	int (*tail)(struct nvgpu_engine_fb_queue *queue, u32 *tail, bool set);
+	int (*head)(struct nvgpu_engine_fb_queue *queue, u32 *head, bool set);
+
+	/* engine specific ops */
+	int (*queue_head)(struct gk20a *g, u32 queue_id, u32 queue_index,
+		u32 *head, bool set);
+	int (*queue_tail)(struct gk20a *g, u32 queue_id, u32 queue_index,
+		u32 *tail, bool set);
+};
+
+#endif /* NVGPU_ENGINE_FB_QUEUE_PRIV_H */
--- a/drivers/gpu/nvgpu/common/engine_queues/engine_mem_queue.c
+++ b/drivers/gpu/nvgpu/common/engine_queues/engine_mem_queue.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (c) 2017-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/errno.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/log.h>
+#include <nvgpu/flcnif_cmn.h>
+#include <nvgpu/pmu/pmuif/nvgpu_cmdif.h>
+#include <nvgpu/engine_queue.h>
+#include <nvgpu/engine_mem_queue.h>
+#include <nvgpu/pmu/cmd.h>
+
+#include "engine_mem_queue_priv.h"
+#include "engine_dmem_queue.h"
+#include "engine_emem_queue.h"
+
+static int mem_queue_get_head_tail(struct nvgpu_engine_mem_queue *queue,
+				   u32 *q_head, u32 *q_tail)
+{
+	int err = 0;
+
+	err = queue->head(queue->g, queue->id, queue->index,
+			  q_head, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(queue->g, "flcn-%d, queue-%d, head GET failed",
+			  queue->flcn_id, queue->id);
+		goto exit;
+	}
+
+	err = queue->tail(queue->g, queue->id, queue->index,
+			  q_tail, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(queue->g, "flcn-%d, queue-%d, tail GET failed",
+			  queue->flcn_id, queue->id);
+		goto exit;
+	}
+
+	return 0;
+exit:
+	return err;
+}
+
+/* common falcon queue ops */
+static bool engine_mem_queue_has_room(struct nvgpu_engine_mem_queue *queue,
+			u32 size, bool *need_rewind)
+{
+	u32 q_head = 0;
+	u32 q_tail = 0;
+	u32 q_free = 0;
+	bool q_rewind = false;
+	int err = 0;
+
+	size = NVGPU_ALIGN(size, QUEUE_ALIGNMENT);
+
+	err = mem_queue_get_head_tail(queue, &q_head, &q_tail);
+	if (err != 0) {
+		goto exit;
+	}
+
+	if (q_head >= q_tail) {
+		q_free = queue->offset + queue->size - q_head;
+		q_free -= (u32)PMU_CMD_HDR_SIZE;
+
+		if (size > q_free) {
+			q_rewind = true;
+			q_head = queue->offset;
+		}
+	}
+
+	if (q_head < q_tail) {
+		q_free = q_tail - q_head - 1U;
+	}
+
+	if (need_rewind != NULL) {
+		*need_rewind = q_rewind;
+	}
+
+exit:
+	return size <= q_free;
+}
+
+static int engine_mem_queue_rewind(struct nvgpu_falcon *flcn,
+	struct nvgpu_engine_mem_queue *queue)
+{
+	struct gk20a *g = queue->g;
+	struct pmu_cmd cmd;
+	int err = 0;
+
+	if (queue->oflag == OFLAG_WRITE) {
+		cmd.hdr.unit_id = PMU_UNIT_REWIND;
+		cmd.hdr.size = (u8)PMU_CMD_HDR_SIZE;
+		err = queue->push(flcn, queue, queue->position,
+				  &cmd, cmd.hdr.size);
+		if (err != 0) {
+			nvgpu_err(g, "flcn-%d queue-%d, rewind request failed",
+				queue->flcn_id, queue->id);
+			goto exit;
+		} else {
+			queue->position += nvgpu_safe_cast_u32_to_u8(
+				NVGPU_ALIGN(U32(cmd.hdr.size), QUEUE_ALIGNMENT));
+			nvgpu_log_info(g, "flcn-%d queue-%d, rewinded",
+			queue->flcn_id, queue->id);
+		}
+	}
+
+	/* update queue position */
+	queue->position = queue->offset;
+
+	if (queue->oflag == OFLAG_READ) {
+		err = queue->tail(g, queue->id, queue->index, &queue->position,
+			QUEUE_SET);
+		if (err != 0) {
+			nvgpu_err(g, "flcn-%d queue-%d, position SET failed",
+				queue->flcn_id, queue->id);
+			goto exit;
+		}
+	}
+
+exit:
+	return err;
+}
+
+static int engine_mem_queue_prepare_write(struct nvgpu_falcon *flcn,
+	struct nvgpu_engine_mem_queue *queue, u32 size)
+{
+	bool q_rewind = false;
+	int err = 0;
+
+	/* make sure there's enough free space for the write */
+	if (!engine_mem_queue_has_room(queue, size, &q_rewind)) {
+		nvgpu_log_info(queue->g, "queue full: queue-id %d: index %d",
+			queue->id, queue->index);
+		err = -EAGAIN;
+		goto exit;
+	}
+
+	err = queue->head(queue->g, queue->id, queue->index,
+			  &queue->position, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(queue->g, "flcn-%d queue-%d, position GET failed",
+			queue->flcn_id, queue->id);
+		goto exit;
+	}
+
+	if (q_rewind) {
+		err = engine_mem_queue_rewind(flcn, queue);
+	}
+
+exit:
+	return err;
+}
+
+/* queue public functions */
+
+/* queue push operation with lock */
+int nvgpu_engine_mem_queue_push(struct nvgpu_falcon *flcn,
+	struct nvgpu_engine_mem_queue *queue, void *data, u32 size)
+{
+	struct gk20a *g;
+	int err = 0;
+
+	if ((flcn == NULL) || (queue == NULL)) {
+		return -EINVAL;
+	}
+
+	g = queue->g;
+
+	if (queue->oflag != OFLAG_WRITE) {
+		nvgpu_err(g, "flcn-%d, queue-%d not opened for write",
+			queue->flcn_id, queue->id);
+		err = -EINVAL;
+		goto exit;
+	}
+
+	/* acquire mutex */
+	nvgpu_mutex_acquire(&queue->mutex);
+
+	err = engine_mem_queue_prepare_write(flcn, queue, size);
+	if (err != 0) {
+		goto unlock_mutex;
+	}
+
+	err = queue->push(flcn, queue, queue->position, data, size);
+	if (err != 0) {
+		nvgpu_err(g, "flcn-%d queue-%d, fail to write",
+			queue->flcn_id, queue->id);
+		goto unlock_mutex;
+	}
+
+	queue->position += NVGPU_ALIGN(size, QUEUE_ALIGNMENT);
+
+	err = queue->head(g, queue->id, queue->index,
+			  &queue->position, QUEUE_SET);
+	if (err != 0) {
+		nvgpu_err(g, "flcn-%d queue-%d, position SET failed",
+			queue->flcn_id, queue->id);
+	}
+
+unlock_mutex:
+	/* release mutex */
+	nvgpu_mutex_release(&queue->mutex);
+exit:
+	return err;
+}
+
+/* queue pop operation with lock */
+int nvgpu_engine_mem_queue_pop(struct nvgpu_falcon *flcn,
+	struct nvgpu_engine_mem_queue *queue, void *data, u32 size,
+	u32 *bytes_read)
+{
+	struct gk20a *g;
+	u32 q_tail = 0;
+	u32 q_head = 0;
+	u32 used = 0;
+	int err = 0;
+
+	*bytes_read = 0;
+
+	if ((flcn == NULL) || (queue == NULL)) {
+		return -EINVAL;
+	}
+
+	g = queue->g;
+
+	if (queue->oflag != OFLAG_READ) {
+		nvgpu_err(g, "flcn-%d, queue-%d, not opened for read",
+			queue->flcn_id, queue->id);
+		err = -EINVAL;
+		goto exit;
+	}
+
+	/* acquire mutex */
+	nvgpu_mutex_acquire(&queue->mutex);
+
+	err = mem_queue_get_head_tail(queue, &q_head, &queue->position);
+	if (err != 0) {
+		goto unlock_mutex;
+	}
+
+	q_tail = queue->position;
+
+	if (q_head == q_tail) {
+		goto unlock_mutex;
+	} else if (q_head > q_tail) {
+		used = q_head - q_tail;
+	} else {
+		used = queue->offset + queue->size - q_tail;
+	}
+
+	if (size > used) {
+		nvgpu_warn(g, "queue size smaller than request read");
+		size = used;
+	}
+
+	err = queue->pop(flcn, queue, q_tail, data, size);
+	if (err != 0) {
+		nvgpu_err(g, "flcn-%d queue-%d, fail to read",
+			queue->flcn_id, queue->id);
+		goto unlock_mutex;
+	}
+
+	queue->position += NVGPU_ALIGN(size, QUEUE_ALIGNMENT);
+
+	err = queue->tail(g, queue->id, queue->index,
+			  &queue->position, QUEUE_SET);
+	if (err != 0) {
+		nvgpu_err(g, "flcn-%d queue-%d, position SET failed",
+			queue->flcn_id, queue->id);
+		goto unlock_mutex;
+	}
+
+	*bytes_read = size;
+
+unlock_mutex:
+	/* release mutex */
+	nvgpu_mutex_release(&queue->mutex);
+exit:
+	return err;
+}
+
+int nvgpu_engine_mem_queue_rewind(struct nvgpu_falcon *flcn,
+	struct nvgpu_engine_mem_queue *queue)
+{
+	int err = 0;
+
+	if ((flcn == NULL) || (queue == NULL)) {
+		return -EINVAL;
+	}
+
+	/* acquire mutex */
+	nvgpu_mutex_acquire(&queue->mutex);
+
+	err = engine_mem_queue_rewind(flcn, queue);
+
+	/* release mutex */
+	nvgpu_mutex_release(&queue->mutex);
+
+	return err;
+}
+
+/* queue is_empty check with lock */
+bool nvgpu_engine_mem_queue_is_empty(struct nvgpu_engine_mem_queue *queue)
+{
+	u32 q_head = 0;
+	u32 q_tail = 0;
+	int err = 0;
+
+	if (queue == NULL) {
+		return true;
+	}
+
+	/* acquire mutex */
+	nvgpu_mutex_acquire(&queue->mutex);
+
+	err = mem_queue_get_head_tail(queue, &q_head, &q_tail);
+	if (err != 0) {
+		goto exit;
+	}
+
+exit:
+	/* release mutex */
+	nvgpu_mutex_release(&queue->mutex);
+
+	return q_head == q_tail;
+}
+
+void nvgpu_engine_mem_queue_free(struct nvgpu_engine_mem_queue **queue_p)
+{
+	struct nvgpu_engine_mem_queue *queue = NULL;
+	struct gk20a *g;
+
+	if ((queue_p == NULL) || (*queue_p == NULL)) {
+		return;
+	}
+
+	queue = *queue_p;
+
+	g = queue->g;
+
+	nvgpu_log_info(g, "flcn id-%d q-id %d: index %d ",
+		       queue->flcn_id, queue->id, queue->index);
+
+	/* destroy mutex */
+	nvgpu_mutex_destroy(&queue->mutex);
+
+	nvgpu_kfree(g, queue);
+	*queue_p = NULL;
+}
+
+u32 nvgpu_engine_mem_queue_get_size(struct nvgpu_engine_mem_queue *queue)
+{
+	return queue->size;
+}
+
+int nvgpu_engine_mem_queue_init(struct nvgpu_engine_mem_queue **queue_p,
+	struct nvgpu_engine_mem_queue_params params)
+{
+	struct nvgpu_engine_mem_queue *queue = NULL;
+	struct gk20a *g = params.g;
+	int err = 0;
+
+	if (queue_p == NULL) {
+		return -EINVAL;
+	}
+
+	queue = (struct nvgpu_engine_mem_queue *)
+		   nvgpu_kmalloc(g, sizeof(struct nvgpu_engine_mem_queue));
+
+	if (queue == NULL) {
+		return -ENOMEM;
+	}
+
+	queue->g = params.g;
+	queue->flcn_id = params.flcn_id;
+	queue->id = params.id;
+	queue->index = params.index;
+	queue->offset = params.offset;
+	queue->position = params.position;
+	queue->size = params.size;
+	queue->oflag = params.oflag;
+	queue->queue_type = params.queue_type;
+
+	queue->head = params.queue_head;
+	queue->tail = params.queue_tail;
+
+	nvgpu_log_info(g,
+		"flcn id-%d q-id %d: index %d, offset 0x%08x, size 0x%08x",
+		queue->flcn_id, queue->id, queue->index,
+		queue->offset, queue->size);
+
+	switch (queue->queue_type) {
+	case QUEUE_TYPE_DMEM:
+		engine_dmem_queue_init(queue);
+		break;
+#ifdef CONFIG_NVGPU_DGPU
+	case QUEUE_TYPE_EMEM:
+		engine_emem_queue_init(queue);
+		break;
+#endif
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	if (err != 0) {
+		nvgpu_err(g, "flcn-%d queue-%d, init failed",
+			queue->flcn_id, queue->id);
+		nvgpu_kfree(g, queue);
+		goto exit;
+	}
+
+	/* init mutex */
+	nvgpu_mutex_init(&queue->mutex);
+
+	*queue_p = queue;
+exit:
+	return err;
+}
--- a/drivers/gpu/nvgpu/common/engine_queues/engine_mem_queue_priv.h
+++ b/drivers/gpu/nvgpu/common/engine_queues/engine_mem_queue_priv.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_ENGINE_MEM_QUEUE_PRIV_H
+#define NVGPU_ENGINE_MEM_QUEUE_PRIV_H
+
+#include <nvgpu/lock.h>
+#include <nvgpu/types.h>
+
+struct gk20a;
+struct nvgpu_falcon;
+
+struct nvgpu_engine_mem_queue {
+	struct gk20a *g;
+	u32 flcn_id;
+
+	/* Queue Type (queue_type) */
+	u8 queue_type;
+
+	/* used by nvgpu, for command LPQ/HPQ */
+	struct nvgpu_mutex mutex;
+
+	/* current write position */
+	u32 position;
+	/* physical dmem offset where this queue begins */
+	u32 offset;
+	/* logical queue identifier */
+	u32 id;
+	/* physical queue index */
+	u32 index;
+	/* in bytes */
+	u32 size;
+	/* open-flag */
+	u32 oflag;
+
+	/* queue type(DMEM-Q/EMEM-Q) specific ops */
+	int (*push)(struct nvgpu_falcon *flcn,
+		    struct nvgpu_engine_mem_queue *queue,
+		    u32 dst, void *data, u32 size);
+	int (*pop)(struct nvgpu_falcon *flcn,
+		   struct nvgpu_engine_mem_queue *queue,
+		   u32 src, void *data, u32 size);
+
+	/* engine specific ops */
+	int (*head)(struct gk20a *g, u32 queue_id, u32 queue_index,
+		u32 *head, bool set);
+	int (*tail)(struct gk20a *g, u32 queue_id, u32 queue_index,
+		u32 *tail, bool set);
+};
+
+#endif /* NVGPU_ENGINE_MEM_QUEUE_PRIV_H */
--- a/drivers/gpu/nvgpu/common/falcon/falcon.c
+++ b/drivers/gpu/nvgpu/common/falcon/falcon.c
@@ -0,0 +1,825 @@
+/*
+ * Copyright (c) 2017-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/gk20a.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/falcon.h>
+#include <nvgpu/io.h>
+#include <nvgpu/static_analysis.h>
+
+#include "falcon_sw_gk20a.h"
+#ifdef CONFIG_NVGPU_DGPU
+#include "falcon_sw_tu104.h"
+#endif
+
+#if defined(CONFIG_NVGPU_NEXT) && defined(CONFIG_NVGPU_NON_FUSA)
+#include "nvgpu_next_gpuid.h"
+#endif
+
+static bool is_falcon_valid(struct nvgpu_falcon *flcn)
+{
+	if (flcn == NULL) {
+		return false;
+	}
+
+	if (!flcn->is_falcon_supported) {
+		nvgpu_err(flcn->g, "Falcon %d not supported", flcn->flcn_id);
+		return false;
+	}
+
+	return true;
+}
+
+u32 nvgpu_falcon_readl(struct nvgpu_falcon *flcn, u32 offset)
+{
+	return nvgpu_readl(flcn->g,
+			   nvgpu_safe_add_u32(flcn->flcn_base, offset));
+}
+
+void nvgpu_falcon_writel(struct nvgpu_falcon *flcn,
+				       u32 offset, u32 val)
+{
+	nvgpu_writel(flcn->g, nvgpu_safe_add_u32(flcn->flcn_base, offset), val);
+}
+
+int nvgpu_falcon_reset(struct nvgpu_falcon *flcn)
+{
+	struct gk20a *g;
+	int status = 0;
+
+	if (!is_falcon_valid(flcn)) {
+		return -EINVAL;
+	}
+
+	g = flcn->g;
+
+	if (flcn->flcn_engine_dep_ops.reset_eng != NULL) {
+		/* falcon & engine reset */
+		status = flcn->flcn_engine_dep_ops.reset_eng(g);
+	} else {
+		g->ops.falcon.reset(flcn);
+	}
+
+	if (status == 0) {
+		status = nvgpu_falcon_mem_scrub_wait(flcn);
+	}
+
+	return status;
+}
+
+int nvgpu_falcon_wait_for_halt(struct nvgpu_falcon *flcn, unsigned int timeout)
+{
+	struct nvgpu_timeout to;
+	struct gk20a *g;
+	int status;
+
+	if (!is_falcon_valid(flcn)) {
+		return -EINVAL;
+	}
+
+	g = flcn->g;
+
+	status = nvgpu_timeout_init(g, &to, timeout, NVGPU_TIMER_CPU_TIMER);
+	if (status != 0) {
+		return status;
+	}
+
+	do {
+		if (g->ops.falcon.is_falcon_cpu_halted(flcn)) {
+			break;
+		}
+
+		nvgpu_udelay(10);
+	} while (nvgpu_timeout_expired(&to) == 0);
+
+	if (nvgpu_timeout_peek_expired(&to)) {
+		status = -ETIMEDOUT;
+	}
+
+	return status;
+}
+
+int nvgpu_falcon_wait_idle(struct nvgpu_falcon *flcn)
+{
+	struct nvgpu_timeout timeout;
+	struct gk20a *g;
+	int status;
+
+	if (!is_falcon_valid(flcn)) {
+		return -EINVAL;
+	}
+
+	g = flcn->g;
+
+	status = nvgpu_timeout_init(g, &timeout, 2000, NVGPU_TIMER_RETRY_TIMER);
+	if (status != 0) {
+		return status;
+	}
+
+	/* wait for falcon idle */
+	do {
+		if (g->ops.falcon.is_falcon_idle(flcn)) {
+			break;
+		}
+
+		if (nvgpu_timeout_expired_msg(&timeout,
+			"waiting for falcon idle") != 0) {
+			return -ETIMEDOUT;
+		}
+
+		nvgpu_usleep_range(100, 200);
+	} while (true);
+
+	return 0;
+}
+
+int nvgpu_falcon_mem_scrub_wait(struct nvgpu_falcon *flcn)
+{
+	struct nvgpu_timeout timeout;
+	/**
+	 * Delay depends on memory size and pwr_clk
+	 * delay = (MAX {IMEM_SIZE, DMEM_SIZE} * 64 + 1) / pwr_clk
+	 * Timeout set is 1msec & status check at interval 10usec
+	 */
+	const u32 mem_scrubbing_max_timeout = 1000U;
+	const u32 mem_scrubbing_default_timeout = 10U;
+	struct gk20a *g;
+	int status;
+
+	if (!is_falcon_valid(flcn)) {
+		return -EINVAL;
+	}
+
+	g = flcn->g;
+
+	/* check IMEM/DMEM scrubbing complete status */
+	status = nvgpu_timeout_init(g, &timeout,
+				    mem_scrubbing_max_timeout /
+					mem_scrubbing_default_timeout,
+				    NVGPU_TIMER_RETRY_TIMER);
+	if (status != 0) {
+		return status;
+	}
+
+	do {
+		if (g->ops.falcon.is_falcon_scrubbing_done(flcn)) {
+			break;
+		}
+		nvgpu_udelay(mem_scrubbing_default_timeout);
+	} while (nvgpu_timeout_expired(&timeout) == 0);
+
+	if (nvgpu_timeout_peek_expired(&timeout)) {
+		status = -ETIMEDOUT;
+	}
+
+	return status;
+}
+
+static int falcon_memcpy_params_check(struct nvgpu_falcon *flcn,
+		u32 offset, u32 size, enum falcon_mem_type mem_type, u8 port)
+{
+	struct gk20a *g = flcn->g;
+	u32 mem_size = 0;
+	int ret = -EINVAL;
+
+	if (size == 0U) {
+		nvgpu_err(g, "size is zero");
+		goto exit;
+	}
+
+	if ((offset & 0x3U) != 0U) {
+		nvgpu_err(g, "offset (0x%08x) not 4-byte aligned", offset);
+		goto exit;
+	}
+
+	if (port >= g->ops.falcon.get_ports_count(flcn, mem_type)) {
+		nvgpu_err(g, "invalid port %u", (u32) port);
+		goto exit;
+	}
+
+	mem_size = g->ops.falcon.get_mem_size(flcn, mem_type);
+
+	if (!((offset < mem_size) && ((offset + size) <= mem_size))) {
+		nvgpu_err(g, "flcn-id 0x%x, copy overflow ",
+			flcn->flcn_id);
+		nvgpu_err(g, "total size 0x%x, offset 0x%x, copy size 0x%x",
+			mem_size, offset, size);
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	ret = 0;
+
+exit:
+	return ret;
+}
+
+int nvgpu_falcon_copy_to_dmem(struct nvgpu_falcon *flcn,
+	u32 dst, u8 *src, u32 size, u8 port)
+{
+	int status = -EINVAL;
+	struct gk20a *g;
+
+	if (!is_falcon_valid(flcn)) {
+		return -EINVAL;
+	}
+
+	g = flcn->g;
+
+	if (falcon_memcpy_params_check(flcn, dst, size, MEM_DMEM, port) != 0) {
+		nvgpu_err(g, "incorrect parameters");
+		goto exit;
+	}
+
+	nvgpu_mutex_acquire(&flcn->dmem_lock);
+	status = g->ops.falcon.copy_to_dmem(flcn, dst, src, size, port);
+	nvgpu_mutex_release(&flcn->dmem_lock);
+
+exit:
+	return status;
+}
+
+int nvgpu_falcon_copy_to_imem(struct nvgpu_falcon *flcn,
+	u32 dst, u8 *src, u32 size, u8 port, bool sec, u32 tag)
+{
+	int status = -EINVAL;
+	struct gk20a *g;
+
+	if (!is_falcon_valid(flcn)) {
+		return -EINVAL;
+	}
+
+	g = flcn->g;
+
+	if (falcon_memcpy_params_check(flcn, dst, size, MEM_IMEM, port) != 0) {
+		nvgpu_err(g, "incorrect parameters");
+		goto exit;
+	}
+
+	nvgpu_mutex_acquire(&flcn->imem_lock);
+	status = g->ops.falcon.copy_to_imem(flcn, dst, src,
+					    size, port, sec, tag);
+	nvgpu_mutex_release(&flcn->imem_lock);
+
+exit:
+	return status;
+}
+
+u32 nvgpu_falcon_mailbox_read(struct nvgpu_falcon *flcn, u32 mailbox_index)
+{
+	struct gk20a *g;
+	u32 data = 0;
+
+	if (!is_falcon_valid(flcn)) {
+		return 0;
+	}
+
+	g = flcn->g;
+
+	if (mailbox_index >= FALCON_MAILBOX_COUNT) {
+		nvgpu_err(g, "incorrect mailbox id %d", mailbox_index);
+		goto exit;
+	}
+
+	data = g->ops.falcon.mailbox_read(flcn, mailbox_index);
+
+exit:
+	return data;
+}
+
+void nvgpu_falcon_mailbox_write(struct nvgpu_falcon *flcn, u32 mailbox_index,
+		u32 data)
+{
+	struct gk20a *g;
+
+	if (!is_falcon_valid(flcn)) {
+		return;
+	}
+
+	g = flcn->g;
+
+	if (mailbox_index >= FALCON_MAILBOX_COUNT) {
+		nvgpu_err(g, "incorrect mailbox id %d", mailbox_index);
+		goto exit;
+	}
+
+	g->ops.falcon.mailbox_write(flcn, mailbox_index, data);
+
+exit:
+	return;
+}
+
+int nvgpu_falcon_hs_ucode_load_bootstrap(struct nvgpu_falcon *flcn, u32 *ucode,
+	u32 *ucode_header)
+{
+	struct gk20a *g;
+	u32 sec_imem_dest = 0U;
+	int err = 0;
+
+	if (!is_falcon_valid(flcn)) {
+		return -EINVAL;
+	}
+
+	g = flcn->g;
+
+	/* falcon reset */
+	err = nvgpu_falcon_reset(flcn);
+	if (err != 0) {
+		nvgpu_err(g, "nvgpu_falcon_reset() failed err=%d", err);
+		return err;
+	}
+
+	/* setup falcon apertures, boot-config */
+	if (flcn->flcn_engine_dep_ops.setup_bootstrap_config != NULL) {
+		flcn->flcn_engine_dep_ops.setup_bootstrap_config(flcn->g);
+	}
+
+	/* Copy Non Secure IMEM code */
+	err = nvgpu_falcon_copy_to_imem(flcn, 0U,
+		(u8 *)&ucode[ucode_header[OS_CODE_OFFSET] >> 2U],
+		ucode_header[OS_CODE_SIZE], 0U, false,
+		GET_IMEM_TAG(ucode_header[OS_CODE_OFFSET]));
+	if (err != 0) {
+		nvgpu_err(g, "HS ucode non-secure code to IMEM failed");
+		goto exit;
+	}
+
+	/* Put secure code after non-secure block */
+	sec_imem_dest = GET_NEXT_BLOCK(ucode_header[OS_CODE_SIZE]);
+
+	err = nvgpu_falcon_copy_to_imem(flcn, sec_imem_dest,
+		(u8 *)&ucode[ucode_header[APP_0_CODE_OFFSET] >> 2U],
+		ucode_header[APP_0_CODE_SIZE], 0U, true,
+		GET_IMEM_TAG(ucode_header[APP_0_CODE_OFFSET]));
+	if (err != 0) {
+		nvgpu_err(g, "HS ucode secure code to IMEM failed");
+		goto exit;
+	}
+
+	/* load DMEM: ensure that signatures are patched */
+	err = nvgpu_falcon_copy_to_dmem(flcn, 0U, (u8 *)&ucode[
+		ucode_header[OS_DATA_OFFSET] >> 2U],
+		ucode_header[OS_DATA_SIZE], 0U);
+	if (err != 0) {
+		nvgpu_err(g, "HS ucode data copy to DMEM failed");
+		goto exit;
+	}
+
+	/*
+	 * Write non-zero value to mailbox register which is updated by
+	 * HS bin to denote its return status.
+	 */
+	nvgpu_falcon_mailbox_write(flcn, FALCON_MAILBOX_0, 0xdeadbeefU);
+
+	/* set BOOTVEC to start of non-secure code */
+	g->ops.falcon.bootstrap(flcn, 0U);
+
+exit:
+	return err;
+}
+
+u32 nvgpu_falcon_get_id(struct nvgpu_falcon *flcn)
+{
+	return flcn->flcn_id;
+}
+
+#if defined(CONFIG_NVGPU_NEXT)
+bool nvgpu_falcon_is_falcon2_enabled(struct nvgpu_falcon *flcn)
+{
+	return flcn->is_falcon2_enabled ? true : false;
+}
+
+bool nvgpu_falcon_is_feature_supported(struct nvgpu_falcon *flcn,
+		u32 feature)
+{
+	return nvgpu_test_bit(feature, (void *)&flcn->fuse_settings);
+}
+#endif
+
+struct nvgpu_falcon *nvgpu_falcon_get_instance(struct gk20a *g, u32 flcn_id)
+{
+	struct nvgpu_falcon *flcn = NULL;
+
+	switch (flcn_id) {
+	case FALCON_ID_PMU:
+		flcn = &g->pmu_flcn;
+		break;
+	case FALCON_ID_FECS:
+		flcn = &g->fecs_flcn;
+		break;
+	case FALCON_ID_GPCCS:
+		flcn = &g->gpccs_flcn;
+		break;
+	case FALCON_ID_GSPLITE:
+		flcn = &g->gsp_flcn;
+		break;
+	case FALCON_ID_NVDEC:
+		flcn = &g->nvdec_flcn;
+		break;
+	case FALCON_ID_SEC2:
+		flcn = &g->sec2.flcn;
+		break;
+	case FALCON_ID_MINION:
+		flcn = &g->minion_flcn;
+		break;
+	default:
+		nvgpu_err(g, "Invalid/Unsupported falcon ID %x", flcn_id);
+		break;
+	};
+
+	return flcn;
+}
+
+static int falcon_sw_chip_init(struct gk20a *g, struct nvgpu_falcon *flcn)
+{
+	u32 ver = nvgpu_safe_add_u32(g->params.gpu_arch, g->params.gpu_impl);
+	int err = 0;
+
+	switch (ver) {
+#ifdef CONFIG_NVGPU_NON_FUSA
+	case GK20A_GPUID_GM20B:
+	case GK20A_GPUID_GM20B_B:
+		gk20a_falcon_sw_init(flcn);
+		break;
+	case NVGPU_GPUID_GP10B:
+		gk20a_falcon_sw_init(flcn);
+		break;
+#if defined(CONFIG_NVGPU_NEXT)
+	case NVGPU_NEXT_GPUID:
+		nvgpu_next_falcon_sw_init(flcn);
+		break;
+#endif
+#endif
+	case NVGPU_GPUID_GV11B:
+		gk20a_falcon_sw_init(flcn);
+		break;
+#ifdef CONFIG_NVGPU_DGPU
+	case NVGPU_GPUID_TU104:
+#if defined(CONFIG_NVGPU_NEXT)
+	case NVGPU_NEXT_DGPU_GPUID:
+#endif
+		tu104_falcon_sw_init(flcn);
+		break;
+#endif
+	default:
+		err = -EINVAL;
+		nvgpu_err(g, "no support for GPUID %x", ver);
+		break;
+	}
+
+	return err;
+}
+
+int nvgpu_falcon_sw_init(struct gk20a *g, u32 flcn_id)
+{
+	struct nvgpu_falcon *flcn = NULL;
+	int err = 0;
+
+	flcn = nvgpu_falcon_get_instance(g, flcn_id);
+	if (flcn == NULL) {
+		return -ENODEV;
+	}
+
+	flcn->flcn_id = flcn_id;
+	flcn->g = g;
+
+	/* call SW init methods to assign flcn base & support of a falcon */
+	err = falcon_sw_chip_init(g, flcn);
+	if (err != 0) {
+		nvgpu_err(g, "Chip specific falcon sw init failed %d", err);
+		return err;
+	}
+
+	nvgpu_mutex_init(&flcn->imem_lock);
+	nvgpu_mutex_init(&flcn->dmem_lock);
+
+#ifdef CONFIG_NVGPU_DGPU
+	if (flcn->emem_supported) {
+		nvgpu_mutex_init(&flcn->emem_lock);
+	}
+#endif
+
+	return 0;
+}
+
+void nvgpu_falcon_sw_free(struct gk20a *g, u32 flcn_id)
+{
+	struct nvgpu_falcon *flcn = NULL;
+
+	flcn = nvgpu_falcon_get_instance(g, flcn_id);
+	if (flcn == NULL) {
+		return;
+	}
+
+	if (flcn->is_falcon_supported) {
+		flcn->is_falcon_supported = false;
+	} else {
+		nvgpu_log_info(g, "falcon 0x%x not supported on %s",
+			flcn->flcn_id, g->name);
+		return;
+	}
+
+#ifdef CONFIG_NVGPU_DGPU
+	if (flcn->emem_supported) {
+		nvgpu_mutex_destroy(&flcn->emem_lock);
+	}
+#endif
+	nvgpu_mutex_destroy(&flcn->dmem_lock);
+	nvgpu_mutex_destroy(&flcn->imem_lock);
+}
+
+void nvgpu_falcon_set_irq(struct nvgpu_falcon *flcn, bool enable,
+	u32 intr_mask, u32 intr_dest)
+{
+	struct gk20a *g;
+
+	if (!is_falcon_valid(flcn)) {
+		return;
+	}
+
+	g = flcn->g;
+
+	if (!flcn->is_interrupt_enabled) {
+		nvgpu_warn(g, "Interrupt not supported on flcn 0x%x ",
+			flcn->flcn_id);
+		return;
+	}
+
+	g->ops.falcon.set_irq(flcn, enable, intr_mask, intr_dest);
+}
+
+#ifdef CONFIG_NVGPU_DGPU
+int nvgpu_falcon_copy_from_emem(struct nvgpu_falcon *flcn,
+	u32 src, u8 *dst, u32 size, u8 port)
+{
+	struct nvgpu_falcon_engine_dependency_ops *flcn_dops;
+	int status = -EINVAL;
+	struct gk20a *g;
+
+	if (!is_falcon_valid(flcn)) {
+		return -EINVAL;
+	}
+
+	g = flcn->g;
+	flcn_dops = &flcn->flcn_engine_dep_ops;
+
+	if (flcn_dops->copy_from_emem != NULL) {
+		nvgpu_mutex_acquire(&flcn->emem_lock);
+		status = flcn_dops->copy_from_emem(g, src, dst, size, port);
+		nvgpu_mutex_release(&flcn->emem_lock);
+	} else {
+		nvgpu_warn(g, "Invalid op on falcon 0x%x ",
+			flcn->flcn_id);
+		goto exit;
+	}
+
+exit:
+	return status;
+}
+
+int nvgpu_falcon_copy_to_emem(struct nvgpu_falcon *flcn,
+	u32 dst, u8 *src, u32 size, u8 port)
+{
+	struct nvgpu_falcon_engine_dependency_ops *flcn_dops;
+	int status = -EINVAL;
+	struct gk20a *g;
+
+	if (!is_falcon_valid(flcn)) {
+		return -EINVAL;
+	}
+
+	g = flcn->g;
+	flcn_dops = &flcn->flcn_engine_dep_ops;
+
+	if (flcn_dops->copy_to_emem != NULL) {
+		nvgpu_mutex_acquire(&flcn->emem_lock);
+		status = flcn_dops->copy_to_emem(g, dst, src, size, port);
+		nvgpu_mutex_release(&flcn->emem_lock);
+	} else {
+		nvgpu_warn(g, "Invalid op on falcon 0x%x ",
+			flcn->flcn_id);
+		goto exit;
+	}
+
+exit:
+	return status;
+}
+#endif
+
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+void nvgpu_falcon_dump_stats(struct nvgpu_falcon *flcn)
+{
+	if (!is_falcon_valid(flcn)) {
+		return;
+	}
+
+	flcn->g->ops.falcon.dump_falcon_stats(flcn);
+}
+#endif
+
+#ifdef CONFIG_NVGPU_FALCON_NON_FUSA
+int nvgpu_falcon_bootstrap(struct nvgpu_falcon *flcn, u32 boot_vector)
+{
+	if (!is_falcon_valid(flcn)) {
+		return -EINVAL;
+	}
+
+	flcn->g->ops.falcon.bootstrap(flcn, boot_vector);
+
+	return 0;
+}
+
+int nvgpu_falcon_get_mem_size(struct nvgpu_falcon *flcn,
+			      enum falcon_mem_type type, u32 *size)
+{
+	if (!is_falcon_valid(flcn)) {
+		return -EINVAL;
+	}
+
+	*size = flcn->g->ops.falcon.get_mem_size(flcn, type);
+
+	return 0;
+}
+
+int nvgpu_falcon_clear_halt_intr_status(struct nvgpu_falcon *flcn,
+	unsigned int timeout)
+{
+	struct nvgpu_timeout to;
+	struct gk20a *g;
+	int status;
+
+	if (!is_falcon_valid(flcn)) {
+		return -EINVAL;
+	}
+
+	g = flcn->g;
+
+	status = nvgpu_timeout_init(g, &to, timeout, NVGPU_TIMER_CPU_TIMER);
+	if (status != 0) {
+		return status;
+	}
+
+	do {
+		if (g->ops.falcon.clear_halt_interrupt_status(flcn)) {
+			break;
+		}
+
+		nvgpu_udelay(1);
+	} while (nvgpu_timeout_expired(&to) == 0);
+
+	if (nvgpu_timeout_peek_expired(&to)) {
+		status = -ETIMEDOUT;
+	}
+
+	return status;
+}
+
+int nvgpu_falcon_copy_from_dmem(struct nvgpu_falcon *flcn,
+	u32 src, u8 *dst, u32 size, u8 port)
+{
+	int status = -EINVAL;
+	struct gk20a *g;
+
+	if (!is_falcon_valid(flcn)) {
+		return -EINVAL;
+	}
+
+	g = flcn->g;
+
+	if (falcon_memcpy_params_check(flcn, src, size, MEM_DMEM, port) != 0) {
+		nvgpu_err(g, "incorrect parameters");
+		goto exit;
+	}
+
+	nvgpu_mutex_acquire(&flcn->dmem_lock);
+	status = g->ops.falcon.copy_from_dmem(flcn, src, dst, size, port);
+	nvgpu_mutex_release(&flcn->dmem_lock);
+
+exit:
+	return status;
+}
+
+int nvgpu_falcon_copy_from_imem(struct nvgpu_falcon *flcn,
+	u32 src, u8 *dst, u32 size, u8 port)
+{
+	int status = -EINVAL;
+	struct gk20a *g;
+
+	if (!is_falcon_valid(flcn)) {
+		return -EINVAL;
+	}
+
+	g = flcn->g;
+
+	if (falcon_memcpy_params_check(flcn, src, size, MEM_IMEM, port) != 0) {
+		nvgpu_err(g, "incorrect parameters");
+		goto exit;
+	}
+
+	nvgpu_mutex_acquire(&flcn->imem_lock);
+	status = g->ops.falcon.copy_from_imem(flcn, src, dst, size, port);
+	nvgpu_mutex_release(&flcn->imem_lock);
+
+exit:
+	return status;
+}
+
+static void falcon_print_mem(struct nvgpu_falcon *flcn, u32 src,
+	u32 size, enum falcon_mem_type mem_type)
+{
+	u32 buff[64] = {0};
+	u32 total_block_read = 0;
+	u32 byte_read_count = 0;
+	struct gk20a *g;
+	u32 i = 0;
+	int status = 0;
+
+	g = flcn->g;
+
+	if (falcon_memcpy_params_check(flcn, src, size, mem_type, 0) != 0) {
+		nvgpu_err(g, "incorrect parameters");
+		return;
+	}
+
+	nvgpu_info(g, " offset 0x%x  size %d bytes", src, size);
+
+	total_block_read = size >> 8;
+	do {
+		byte_read_count =
+			(total_block_read != 0U) ? (u32)sizeof(buff) : size;
+
+		if (byte_read_count == 0U) {
+			break;
+		}
+
+		if (mem_type == MEM_DMEM) {
+			status = nvgpu_falcon_copy_from_dmem(flcn, src,
+				(u8 *)buff, byte_read_count, 0);
+		} else {
+			status = nvgpu_falcon_copy_from_imem(flcn, src,
+				(u8 *)buff, byte_read_count, 0);
+		}
+
+		if (status != 0) {
+			nvgpu_err(g, "MEM print failed");
+			break;
+		}
+
+		for (i = 0U; i < (byte_read_count >> 2U); i += 4U) {
+			nvgpu_info(g, "0x%04x: 0x%08x 0x%08x 0x%08x 0x%08x",
+				src + (i << 2U), buff[i], buff[i+1U],
+				buff[i+2U], buff[i+3U]);
+		}
+
+		src += byte_read_count;
+		size -= byte_read_count;
+	} while (total_block_read-- != 0U);
+}
+
+void nvgpu_falcon_print_dmem(struct nvgpu_falcon *flcn, u32 src, u32 size)
+{
+	if (!is_falcon_valid(flcn)) {
+		return;
+	}
+
+	nvgpu_info(flcn->g, " PRINT DMEM ");
+	falcon_print_mem(flcn, src, size, MEM_DMEM);
+}
+
+void nvgpu_falcon_print_imem(struct nvgpu_falcon *flcn, u32 src, u32 size)
+{
+	if (!is_falcon_valid(flcn)) {
+		return;
+	}
+
+	nvgpu_info(flcn->g, " PRINT IMEM ");
+	falcon_print_mem(flcn, src, size, MEM_IMEM);
+}
+
+void nvgpu_falcon_get_ctls(struct nvgpu_falcon *flcn, u32 *sctl, u32 *cpuctl)
+{
+	if (!is_falcon_valid(flcn)) {
+		return;
+	}
+
+	flcn->g->ops.falcon.get_falcon_ctls(flcn, sctl, cpuctl);
+}
+#endif
--- a/drivers/gpu/nvgpu/common/falcon/falcon_sw_gk20a.c
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_sw_gk20a.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/gk20a.h>
+#include <nvgpu/falcon.h>
+
+#include "falcon_sw_gk20a.h"
+
+void gk20a_falcon_engine_dependency_ops(struct nvgpu_falcon *flcn)
+{
+	struct gk20a *g = flcn->g;
+	struct nvgpu_falcon_engine_dependency_ops *flcn_eng_dep_ops =
+			&flcn->flcn_engine_dep_ops;
+
+	switch (flcn->flcn_id) {
+	case FALCON_ID_PMU:
+		flcn_eng_dep_ops->reset_eng = g->ops.pmu.pmu_reset;
+		flcn_eng_dep_ops->setup_bootstrap_config =
+			g->ops.pmu.flcn_setup_boot_config;
+		break;
+	default:
+		/* NULL assignment make sure
+		 * CPU hard reset in gk20a_falcon_reset() gets execute
+		 * if falcon doesn't need specific reset implementation
+		 */
+		flcn_eng_dep_ops->reset_eng = NULL;
+		break;
+	}
+}
+
+void gk20a_falcon_sw_init(struct nvgpu_falcon *flcn)
+{
+	struct gk20a *g = flcn->g;
+
+	switch (flcn->flcn_id) {
+	case FALCON_ID_PMU:
+		flcn->flcn_base = g->ops.pmu.falcon_base_addr();
+		flcn->is_falcon_supported = true;
+		flcn->is_interrupt_enabled = true;
+		break;
+	case FALCON_ID_FECS:
+		flcn->flcn_base = g->ops.gr.falcon.fecs_base_addr();
+		flcn->is_falcon_supported = true;
+		flcn->is_interrupt_enabled = false;
+		break;
+	case FALCON_ID_GPCCS:
+		flcn->flcn_base = g->ops.gr.falcon.gpccs_base_addr();
+		flcn->is_falcon_supported = true;
+		flcn->is_interrupt_enabled = false;
+		break;
+	default:
+		flcn->is_falcon_supported = false;
+		break;
+	}
+
+	if (flcn->is_falcon_supported) {
+		gk20a_falcon_engine_dependency_ops(flcn);
+	} else {
+		nvgpu_log_info(g, "falcon 0x%x not supported on %s",
+			flcn->flcn_id, g->name);
+	}
+}
--- a/drivers/gpu/nvgpu/common/falcon/falcon_sw_gk20a.h
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_sw_gk20a.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NVGPU_FALCON_SW_GK20A_H
+#define NVGPU_FALCON_SW_GK20A_H
+
+void gk20a_falcon_engine_dependency_ops(struct nvgpu_falcon *flcn);
+void gk20a_falcon_sw_init(struct nvgpu_falcon *flcn);
+
+#endif /* NVGPU_FALCON_SW_GK20A_H */
--- a/drivers/gpu/nvgpu/common/falcon/falcon_sw_tu104.c
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_sw_tu104.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2018-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <nvgpu/gk20a.h>
+#include <nvgpu/falcon.h>
+
+#include "falcon_sw_gk20a.h"
+#include "falcon_sw_tu104.h"
+
+void tu104_falcon_engine_dependency_ops(struct nvgpu_falcon *flcn)
+{
+	struct nvgpu_falcon_engine_dependency_ops *flcn_eng_dep_ops =
+			&flcn->flcn_engine_dep_ops;
+	struct gk20a *g = flcn->g;
+
+	gk20a_falcon_engine_dependency_ops(flcn);
+
+	switch (flcn->flcn_id) {
+	case FALCON_ID_GSPLITE:
+		flcn_eng_dep_ops->reset_eng = g->ops.gsp.gsp_reset;
+		flcn_eng_dep_ops->setup_bootstrap_config =
+			g->ops.gsp.falcon_setup_boot_config;
+		break;
+	case FALCON_ID_SEC2:
+		flcn_eng_dep_ops->reset_eng = g->ops.sec2.sec2_reset;
+		flcn_eng_dep_ops->setup_bootstrap_config =
+			g->ops.sec2.flcn_setup_boot_config;
+		flcn_eng_dep_ops->copy_to_emem = g->ops.sec2.sec2_copy_to_emem;
+		flcn_eng_dep_ops->copy_from_emem =
+						g->ops.sec2.sec2_copy_from_emem;
+		break;
+	default:
+		flcn_eng_dep_ops->reset_eng = NULL;
+		break;
+	}
+}
+
+void tu104_falcon_sw_init(struct nvgpu_falcon *flcn)
+{
+	struct gk20a *g = flcn->g;
+
+	switch (flcn->flcn_id) {
+	case FALCON_ID_GSPLITE:
+		flcn->flcn_base = g->ops.gsp.falcon_base_addr();
+		flcn->is_falcon_supported = true;
+		flcn->is_interrupt_enabled = false;
+		break;
+	case FALCON_ID_SEC2:
+		flcn->flcn_base = g->ops.sec2.falcon_base_addr();
+		flcn->is_falcon_supported = true;
+		flcn->is_interrupt_enabled = true;
+		flcn->emem_supported = true;
+		break;
+	case FALCON_ID_MINION:
+		flcn->flcn_base = g->ops.nvlink.minion.base_addr(g);
+		flcn->is_falcon_supported = true;
+		flcn->is_interrupt_enabled = true;
+		break;
+	case FALCON_ID_NVDEC:
+		flcn->flcn_base = g->ops.nvdec.falcon_base_addr();
+		flcn->is_falcon_supported = true;
+		flcn->is_interrupt_enabled = true;
+		break;
+	default:
+		/*
+		 * set false to inherit falcon support
+		 * from previous chips HAL
+		 */
+		flcn->is_falcon_supported = false;
+		break;
+	}
+
+	if (flcn->is_falcon_supported) {
+		tu104_falcon_engine_dependency_ops(flcn);
+	} else {
+		/*
+		 * Forward call to previous chip's SW init
+		 * to fetch info for requested
+		 * falcon as no changes between
+		 * current & previous chips.
+		 */
+		gk20a_falcon_sw_init(flcn);
+	}
+}
--- a/drivers/gpu/nvgpu/common/falcon/falcon_sw_tu104.h
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_sw_tu104.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_FALCON_SW_TU104_H
+#define NVGPU_FALCON_SW_TU104_H
+
+void tu104_falcon_engine_dependency_ops(struct nvgpu_falcon *flcn);
+void tu104_falcon_sw_init(struct nvgpu_falcon *flcn);
+
+#endif /* NVGPU_FALCON_SW_TU104_H */
--- a/drivers/gpu/nvgpu/common/fb/fb.c
+++ b/drivers/gpu/nvgpu/common/fb/fb.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/power_features/cg.h>
+#include <nvgpu/fb.h>
+
+int nvgpu_init_fb_support(struct gk20a *g)
+{
+	if (g->ops.mc.fb_reset != NULL) {
+		g->ops.mc.fb_reset(g);
+	}
+
+	nvgpu_cg_slcg_fb_ltc_load_enable(g);
+
+	nvgpu_cg_blcg_fb_ltc_load_enable(g);
+
+	if (g->ops.fb.init_fs_state != NULL) {
+		g->ops.fb.init_fs_state(g);
+	}
+	return 0;
+}
--- a/drivers/gpu/nvgpu/common/fbp/fbp.c
+++ b/drivers/gpu/nvgpu/common/fbp/fbp.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/log.h>
+#include <nvgpu/fbp.h>
+#include <nvgpu/static_analysis.h>
+
+#include "fbp_priv.h"
+
+int nvgpu_fbp_init_support(struct gk20a *g)
+{
+	struct nvgpu_fbp *fbp;
+	u32 fbp_en_mask;
+#ifdef CONFIG_NVGPU_NON_FUSA
+	u32 max_ltc_per_fbp;
+	u32 rop_l2_all_en;
+	unsigned long i;
+	unsigned long fbp_en_mask_tmp;
+	u32 tmp;
+#endif
+
+	if (g->fbp != NULL) {
+		return 0;
+	}
+
+	fbp = nvgpu_kzalloc(g, sizeof(*fbp));
+	if (fbp == NULL) {
+		return -ENOMEM;
+	}
+
+#ifdef CONFIG_NVGPU_NON_FUSA
+	fbp->num_fbps = g->ops.priv_ring.get_fbp_count(g);
+	nvgpu_log_info(g, "fbps: %d", fbp->num_fbps);
+#endif
+
+	fbp->max_fbps_count = g->ops.top.get_max_fbps_count(g);
+	nvgpu_log_info(g, "max_fbps_count: %d", fbp->max_fbps_count);
+
+	/*
+	 * Read active fbp mask from fuse
+	 * Note that 0:enable and 1:disable in value read from fuse so we've to
+	 * flip the bits.
+	 * Also set unused bits to zero
+	 */
+	fbp_en_mask = g->ops.fuse.fuse_status_opt_fbp(g);
+	fbp_en_mask = ~fbp_en_mask;
+	fbp_en_mask = fbp_en_mask &
+		nvgpu_safe_sub_u32(BIT32(fbp->max_fbps_count), 1U);
+	fbp->fbp_en_mask = fbp_en_mask;
+
+#ifdef CONFIG_NVGPU_NON_FUSA
+	fbp->fbp_rop_l2_en_mask =
+		nvgpu_kzalloc(g,
+			nvgpu_safe_mult_u64(fbp->max_fbps_count, sizeof(u32)));
+	if (fbp->fbp_rop_l2_en_mask == NULL) {
+		nvgpu_kfree(g, fbp);
+		return -ENOMEM;
+	}
+
+	fbp_en_mask_tmp = fbp_en_mask;
+	max_ltc_per_fbp = g->ops.top.get_max_ltc_per_fbp(g);
+	rop_l2_all_en = nvgpu_safe_sub_u32(BIT32(max_ltc_per_fbp), 1U);
+
+	/* mask of Rop_L2 for each FBP */
+	for_each_set_bit(i, &fbp_en_mask_tmp, fbp->max_fbps_count) {
+		tmp = g->ops.fuse.fuse_status_opt_rop_l2_fbp(g, i);
+		fbp->fbp_rop_l2_en_mask[i] = rop_l2_all_en ^ tmp;
+	}
+#endif
+
+	g->fbp = fbp;
+
+	return 0;
+}
+
+void nvgpu_fbp_remove_support(struct gk20a *g)
+{
+	struct nvgpu_fbp *fbp = g->fbp;
+
+	if (fbp != NULL) {
+		nvgpu_kfree(g, fbp->fbp_rop_l2_en_mask);
+		nvgpu_kfree(g, fbp);
+	}
+
+	g->fbp = NULL;
+}
+
+u32 nvgpu_fbp_get_max_fbps_count(struct nvgpu_fbp *fbp)
+{
+	return fbp->max_fbps_count;
+}
+
+u32 nvgpu_fbp_get_fbp_en_mask(struct nvgpu_fbp *fbp)
+{
+	return fbp->fbp_en_mask;
+}
+
+#ifdef CONFIG_NVGPU_NON_FUSA
+u32 nvgpu_fbp_get_num_fbps(struct nvgpu_fbp *fbp)
+{
+	return fbp->num_fbps;
+}
+
+u32 *nvgpu_fbp_get_rop_l2_en_mask(struct nvgpu_fbp *fbp)
+{
+	return fbp->fbp_rop_l2_en_mask;
+}
+#endif
+
--- a/drivers/gpu/nvgpu/common/fbp/fbp_priv.h
+++ b/drivers/gpu/nvgpu/common/fbp/fbp_priv.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_FBP_PRIV_H
+#define NVGPU_FBP_PRIV_H
+
+struct nvgpu_fbp {
+	u32 num_fbps;
+	u32 max_fbps_count;
+	u32 fbp_en_mask;
+	u32 *fbp_rop_l2_en_mask;
+};
+
+#endif /* NVGPU_FBP_PRIV_H */
--- a/drivers/gpu/nvgpu/common/fence/fence.c
+++ b/drivers/gpu/nvgpu/common/fence/fence.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2014-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/soc.h>
+#include <nvgpu/os_fence.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/fence.h>
+#include <nvgpu/user_fence.h>
+#include "fence_priv.h"
+
+static struct nvgpu_fence_type *nvgpu_fence_from_ref(struct nvgpu_ref *ref)
+{
+	return (struct nvgpu_fence_type *)((uintptr_t)ref -
+				offsetof(struct nvgpu_fence_type, priv.ref));
+}
+
+static void nvgpu_fence_release(struct nvgpu_ref *ref)
+{
+	struct nvgpu_fence_type *f = nvgpu_fence_from_ref(ref);
+	struct nvgpu_fence_type_priv *pf = &f->priv;
+
+	if (nvgpu_os_fence_is_initialized(&pf->os_fence)) {
+		pf->os_fence.ops->drop_ref(&pf->os_fence);
+	}
+
+	pf->ops->release(f);
+}
+
+void nvgpu_fence_put(struct nvgpu_fence_type *f)
+{
+	struct nvgpu_fence_type_priv *pf = &f->priv;
+
+	nvgpu_ref_put(&pf->ref, nvgpu_fence_release);
+}
+
+struct nvgpu_fence_type *nvgpu_fence_get(struct nvgpu_fence_type *f)
+{
+	struct nvgpu_fence_type_priv *pf = &f->priv;
+
+	nvgpu_ref_get(&pf->ref);
+	return f;
+}
+
+/*
+ * Extract an object to be passed to the userspace as a result of a submitted
+ * job. This must be balanced with a call to nvgpu_user_fence_release().
+ */
+struct nvgpu_user_fence nvgpu_fence_extract_user(struct nvgpu_fence_type *f)
+{
+	struct nvgpu_fence_type_priv *pf = &f->priv;
+
+	struct nvgpu_user_fence uf = (struct nvgpu_user_fence) {
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+		.syncpt_id = pf->syncpt_id,
+		.syncpt_value = pf->syncpt_value,
+#endif
+		.os_fence = pf->os_fence,
+	};
+
+	/*
+	 * The os fence member has to live so it can be signaled when the job
+	 * completes. The returned user fence may live longer than that before
+	 * being safely attached to an fd if the job completes before a
+	 * submission ioctl finishes, or if it's stored for cde job state
+	 * tracking.
+	 */
+	if (nvgpu_os_fence_is_initialized(&pf->os_fence)) {
+		pf->os_fence.ops->dup(&pf->os_fence);
+	}
+
+	return uf;
+}
+
+int nvgpu_fence_wait(struct gk20a *g, struct nvgpu_fence_type *f,
+							u32 timeout)
+{
+	struct nvgpu_fence_type_priv *pf = &f->priv;
+
+	if (!nvgpu_platform_is_silicon(g)) {
+		timeout = U32_MAX;
+	}
+	return pf->ops->wait(f, timeout);
+}
+
+bool nvgpu_fence_is_expired(struct nvgpu_fence_type *f)
+{
+	struct nvgpu_fence_type_priv *pf = &f->priv;
+
+	return pf->ops->is_expired(f);
+}
+
+void nvgpu_fence_init(struct nvgpu_fence_type *f,
+		const struct nvgpu_fence_ops *ops,
+		struct nvgpu_os_fence os_fence)
+{
+	struct nvgpu_fence_type_priv *pf = &f->priv;
+
+	nvgpu_ref_init(&pf->ref);
+	pf->ops = ops;
+	pf->os_fence = os_fence;
+}
--- a/drivers/gpu/nvgpu/common/fence/fence_priv.h
+++ b/drivers/gpu/nvgpu/common/fence/fence_priv.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2014-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NVGPU_FENCE_PRIV_H
+#define NVGPU_FENCE_PRIV_H
+
+#include <nvgpu/os_fence.h>
+
+struct nvgpu_fence_type;
+
+struct nvgpu_fence_ops {
+	int (*wait)(struct nvgpu_fence_type *f, u32 timeout);
+	bool (*is_expired)(struct nvgpu_fence_type *f);
+	void (*release)(struct nvgpu_fence_type *f);
+};
+
+void nvgpu_fence_init(struct nvgpu_fence_type *f,
+		const struct nvgpu_fence_ops *ops,
+		struct nvgpu_os_fence os_fence);
+
+#endif
--- a/drivers/gpu/nvgpu/common/fence/fence_sema.c
+++ b/drivers/gpu/nvgpu/common/fence/fence_sema.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2014-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/semaphore.h>
+#include <nvgpu/cond.h>
+#include <nvgpu/fence.h>
+#include <nvgpu/fence_sema.h>
+#include "fence_priv.h"
+
+static int nvgpu_fence_semaphore_wait(struct nvgpu_fence_type *f, u32 timeout)
+{
+	struct nvgpu_fence_type_priv *pf = &f->priv;
+
+	if (!nvgpu_semaphore_is_acquired(pf->semaphore)) {
+		return 0;
+	}
+
+	return NVGPU_COND_WAIT_INTERRUPTIBLE(
+		pf->semaphore_wq,
+		!nvgpu_semaphore_is_acquired(pf->semaphore),
+		timeout);
+}
+
+static bool nvgpu_fence_semaphore_is_expired(struct nvgpu_fence_type *f)
+{
+	struct nvgpu_fence_type_priv *pf = &f->priv;
+
+	return !nvgpu_semaphore_is_acquired(pf->semaphore);
+}
+
+static void nvgpu_fence_semaphore_release(struct nvgpu_fence_type *f)
+{
+	struct nvgpu_fence_type_priv *pf = &f->priv;
+
+	if (pf->semaphore != NULL) {
+		nvgpu_semaphore_put(pf->semaphore);
+	}
+}
+
+static const struct nvgpu_fence_ops nvgpu_fence_semaphore_ops = {
+	.wait = nvgpu_fence_semaphore_wait,
+	.is_expired = nvgpu_fence_semaphore_is_expired,
+	.release = nvgpu_fence_semaphore_release,
+};
+
+/* This function takes ownership of the semaphore as well as the os_fence */
+void nvgpu_fence_from_semaphore(
+		struct nvgpu_fence_type *f,
+		struct nvgpu_semaphore *semaphore,
+		struct nvgpu_cond *semaphore_wq,
+		struct nvgpu_os_fence os_fence)
+{
+	struct nvgpu_fence_type_priv *pf = &f->priv;
+
+	nvgpu_fence_init(f, &nvgpu_fence_semaphore_ops, os_fence);
+
+	pf->semaphore = semaphore;
+	pf->semaphore_wq = semaphore_wq;
+}
--- a/drivers/gpu/nvgpu/common/fence/fence_syncpt.c
+++ b/drivers/gpu/nvgpu/common/fence/fence_syncpt.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2014-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/nvhost.h>
+#include <nvgpu/fence.h>
+#include <nvgpu/fence_syncpt.h>
+#include "fence_priv.h"
+
+static int nvgpu_fence_syncpt_wait(struct nvgpu_fence_type *f, u32 timeout)
+{
+	struct nvgpu_fence_type_priv *pf = &f->priv;
+
+	return nvgpu_nvhost_syncpt_wait_timeout_ext(
+			pf->nvhost_device, pf->syncpt_id, pf->syncpt_value,
+			timeout, NVGPU_NVHOST_DEFAULT_WAITER);
+}
+
+static bool nvgpu_fence_syncpt_is_expired(struct nvgpu_fence_type *f)
+{
+	struct nvgpu_fence_type_priv *pf = &f->priv;
+
+	/*
+	 * In cases we don't register a notifier, we can't expect the
+	 * syncpt value to be updated. For this case, we force a read
+	 * of the value from HW, and then check for expiration.
+	 */
+	if (!nvgpu_nvhost_syncpt_is_expired_ext(pf->nvhost_device,
+				pf->syncpt_id, pf->syncpt_value)) {
+		int err;
+		u32 val;
+
+		err = nvgpu_nvhost_syncpt_read_ext_check(pf->nvhost_device,
+				pf->syncpt_id, &val);
+		WARN(err != 0, "syncpt read failed??");
+		if (err == 0) {
+			return nvgpu_nvhost_syncpt_is_expired_ext(
+					pf->nvhost_device,
+					pf->syncpt_id, pf->syncpt_value);
+		} else {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static void nvgpu_fence_syncpt_release(struct nvgpu_fence_type *f)
+{
+}
+
+static const struct nvgpu_fence_ops nvgpu_fence_syncpt_ops = {
+	.wait = nvgpu_fence_syncpt_wait,
+	.is_expired = nvgpu_fence_syncpt_is_expired,
+	.release = nvgpu_fence_syncpt_release,
+};
+
+/* This function takes the ownership of the os_fence */
+void nvgpu_fence_from_syncpt(
+		struct nvgpu_fence_type *f,
+		struct nvgpu_nvhost_dev *nvhost_device,
+		u32 id, u32 value, struct nvgpu_os_fence os_fence)
+{
+	struct nvgpu_fence_type_priv *pf = &f->priv;
+
+	nvgpu_fence_init(f, &nvgpu_fence_syncpt_ops, os_fence);
+
+	pf->nvhost_device = nvhost_device;
+	pf->syncpt_id = id;
+	pf->syncpt_value = value;
+}
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
--- a/drivers/gpu/nvgpu/common/fifo/channel_wdt.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel_wdt.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2015-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "channel_wdt.h"
+#include "channel_worker.h"
+
+#include <nvgpu/watchdog.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/error_notifier.h>
+#include <nvgpu/gk20a.h>
+
+void nvgpu_channel_set_wdt_debug_dump(struct nvgpu_channel *ch, bool dump)
+{
+	ch->wdt_debug_dump = dump;
+}
+
+static struct nvgpu_channel_wdt_state nvgpu_channel_collect_wdt_state(
+		struct nvgpu_channel *ch)
+{
+	struct gk20a *g = ch->g;
+	struct nvgpu_channel_wdt_state state = { 0, 0 };
+
+	/*
+	 * Note: just checking for nvgpu_channel_wdt_enabled() is not enough at
+	 * the moment because system suspend puts g->regs away but doesn't stop
+	 * the worker thread that runs the watchdog. This might need to be
+	 * cleared up in the future.
+	 */
+	if (nvgpu_channel_wdt_running(ch->wdt)) {
+		/*
+		 * Read the state only if the wdt is on to avoid unnecessary
+		 * accesses. The kernel mem for userd may not even exist; this
+		 * channel could be in usermode submit mode.
+		 */
+		state.gp_get = g->ops.userd.gp_get(g, ch);
+		state.pb_get = g->ops.userd.pb_get(g, ch);
+	}
+
+	return state;
+}
+
+void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch)
+{
+	struct nvgpu_channel_wdt_state state = nvgpu_channel_collect_wdt_state(ch);
+
+	/*
+	 * FIXME: channel recovery can race the submit path and can start even
+	 * after this, but this check is the best we can do for now.
+	 */
+	if (!nvgpu_channel_check_unserviceable(ch)) {
+		nvgpu_channel_wdt_start(ch->wdt, &state);
+	}
+}
+
+void nvgpu_channel_restart_all_wdts(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 chid;
+
+	for (chid = 0; chid < f->num_channels; chid++) {
+		struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);
+
+		if (ch != NULL) {
+			if ((ch->wdt != NULL) &&
+			    !nvgpu_channel_check_unserviceable(ch)) {
+				struct nvgpu_channel_wdt_state state =
+					nvgpu_channel_collect_wdt_state(ch);
+
+				nvgpu_channel_wdt_rewind(ch->wdt, &state);
+			}
+			nvgpu_channel_put(ch);
+		}
+	}
+}
+
+static void nvgpu_channel_recover_from_wdt(struct nvgpu_channel *ch)
+{
+	struct gk20a *g = ch->g;
+
+	nvgpu_log_fn(g, " ");
+
+	if (nvgpu_channel_check_unserviceable(ch)) {
+		/* channel is already recovered */
+		nvgpu_info(g, "chid: %d unserviceable but wdt was ON", ch->chid);
+		return;
+	}
+
+	nvgpu_err(g, "Job on channel %d timed out", ch->chid);
+
+	/* force reset calls gk20a_debug_dump but not this */
+	if (ch->wdt_debug_dump) {
+		gk20a_gr_debug_dump(g);
+	}
+
+#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
+	if (g->ops.tsg.force_reset(ch,
+	    NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT,
+	    ch->wdt_debug_dump) != 0) {
+		nvgpu_err(g, "failed tsg force reset for chid: %d", ch->chid);
+	}
+#endif
+}
+
+/*
+ * Test the watchdog progress. If the channel is stuck, reset it.
+ *
+ * The gpu is implicitly on at this point because the watchdog can only run on
+ * channels that have submitted jobs pending for cleanup.
+ */
+static void nvgpu_channel_check_wdt(struct nvgpu_channel *ch)
+{
+	struct nvgpu_channel_wdt_state state = nvgpu_channel_collect_wdt_state(ch);
+
+	if (nvgpu_channel_wdt_check(ch->wdt, &state)) {
+		nvgpu_channel_recover_from_wdt(ch);
+	}
+}
+
+void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker)
+{
+	struct nvgpu_channel_worker *ch_worker =
+		nvgpu_channel_worker_from_worker(worker);
+	int ret;
+
+	ch_worker->watchdog_interval = 100U;
+
+	ret = nvgpu_timeout_init(worker->g, &ch_worker->timeout,
+			ch_worker->watchdog_interval, NVGPU_TIMER_CPU_TIMER);
+	if (ret != 0) {
+		nvgpu_err(worker->g, "timeout_init failed: %d", ret);
+	}
+}
+
+/**
+ * Loop every living channel, check timeouts and handle stuck channels.
+ */
+static void nvgpu_channel_poll_wdt(struct gk20a *g)
+{
+	unsigned int chid;
+
+	for (chid = 0; chid < g->fifo.num_channels; chid++) {
+		struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);
+
+		if (ch != NULL) {
+			if (!nvgpu_channel_check_unserviceable(ch)) {
+				nvgpu_channel_check_wdt(ch);
+			}
+			nvgpu_channel_put(ch);
+		}
+	}
+}
+
+void nvgpu_channel_worker_poll_wakeup_post_process_item(
+		struct nvgpu_worker *worker)
+{
+	struct gk20a *g = worker->g;
+
+	struct nvgpu_channel_worker *ch_worker =
+		nvgpu_channel_worker_from_worker(worker);
+	int ret;
+
+	if (nvgpu_timeout_peek_expired(&ch_worker->timeout)) {
+		nvgpu_channel_poll_wdt(g);
+		ret = nvgpu_timeout_init(g, &ch_worker->timeout,
+				ch_worker->watchdog_interval,
+				NVGPU_TIMER_CPU_TIMER);
+		if (ret != 0) {
+			nvgpu_err(g, "timeout_init failed: %d", ret);
+		}
+	}
+}
+
+u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout(
+		struct nvgpu_worker *worker)
+{
+	struct nvgpu_channel_worker *ch_worker =
+		nvgpu_channel_worker_from_worker(worker);
+
+	return ch_worker->watchdog_interval;
+}
--- a/drivers/gpu/nvgpu/common/fifo/channel_wdt.h
+++ b/drivers/gpu/nvgpu/common/fifo/channel_wdt.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NVGPU_COMMON_FIFO_CHANNEL_WDT_H
+#define NVGPU_COMMON_FIFO_CHANNEL_WDT_H
+
+#include <nvgpu/types.h>
+
+struct nvgpu_channel;
+
+#ifdef CONFIG_NVGPU_CHANNEL_WDT
+struct nvgpu_worker;
+
+void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch);
+void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker);
+void nvgpu_channel_worker_poll_wakeup_post_process_item(
+		struct nvgpu_worker *worker);
+u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout(
+		struct nvgpu_worker *worker);
+#else
+static inline void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch) {}
+#endif /* CONFIG_NVGPU_CHANNEL_WDT */
+
+#endif /* NVGPU_COMMON_FIFO_CHANNEL_WDT_H */
--- a/drivers/gpu/nvgpu/common/fifo/channel_worker.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel_worker.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "channel_worker.h"
+#include "channel_wdt.h"
+
+#include <nvgpu/worker.h>
+#include <nvgpu/channel.h>
+
+static inline struct nvgpu_channel *
+nvgpu_channel_from_worker_item(struct nvgpu_list_node *node)
+{
+	return (struct nvgpu_channel *)
+	   ((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item));
+};
+
+static void nvgpu_channel_worker_poll_wakeup_process_item(
+		struct nvgpu_list_node *work_item)
+{
+	struct nvgpu_channel *ch = nvgpu_channel_from_worker_item(work_item);
+
+	nvgpu_assert(ch != NULL);
+
+	nvgpu_log_fn(ch->g, " ");
+
+	nvgpu_channel_clean_up_jobs(ch);
+
+	/* ref taken when enqueued */
+	nvgpu_channel_put(ch);
+}
+
+static const struct nvgpu_worker_ops channel_worker_ops = {
+#ifdef CONFIG_NVGPU_CHANNEL_WDT
+	.pre_process = nvgpu_channel_worker_poll_init,
+	.wakeup_post_process =
+		nvgpu_channel_worker_poll_wakeup_post_process_item,
+	.wakeup_timeout =
+		nvgpu_channel_worker_poll_wakeup_condition_get_timeout,
+#endif
+	.wakeup_early_exit = NULL,
+	.wakeup_process_item =
+		nvgpu_channel_worker_poll_wakeup_process_item,
+	.wakeup_condition = NULL,
+};
+
+/**
+ * Initialize the channel worker's metadata and start the background thread.
+ */
+int nvgpu_channel_worker_init(struct gk20a *g)
+{
+	struct nvgpu_worker *worker = &g->channel_worker.worker;
+
+	nvgpu_worker_init_name(worker, "nvgpu_channel_poll", g->name);
+
+	return nvgpu_worker_init(g, worker, &channel_worker_ops);
+}
+
+void nvgpu_channel_worker_deinit(struct gk20a *g)
+{
+	struct nvgpu_worker *worker = &g->channel_worker.worker;
+
+	nvgpu_worker_deinit(worker);
+}
+
+/**
+ * Append a channel to the worker's list, if not there already.
+ *
+ * The worker thread processes work items (channels in its work list) and polls
+ * for other things. This adds @ch to the end of the list and wakes the worker
+ * up immediately. If the channel already existed in the list, it's not added,
+ * because in that case it has been scheduled already but has not yet been
+ * processed.
+ */
+void nvgpu_channel_worker_enqueue(struct nvgpu_channel *ch)
+{
+	struct gk20a *g = ch->g;
+	int ret;
+
+	nvgpu_log_fn(g, " ");
+
+	/*
+	 * Ref released when this item gets processed. The caller should hold
+	 * one ref already, so normally shouldn't fail, but the channel could
+	 * end up being freed between the time the caller got its reference and
+	 * the time we end up here (e.g., if the client got killed); if so, just
+	 * return.
+	 */
+	if (nvgpu_channel_get(ch) == NULL) {
+		nvgpu_info(g, "cannot get ch ref for worker!");
+		return;
+	}
+
+	ret = nvgpu_worker_enqueue(&g->channel_worker.worker,
+			&ch->worker_item);
+	if (ret != 0) {
+		nvgpu_channel_put(ch);
+		return;
+	}
+}
--- a/drivers/gpu/nvgpu/common/fifo/channel_worker.h
+++ b/drivers/gpu/nvgpu/common/fifo/channel_worker.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_COMMON_FIFO_CHANNEL_WORKER_H
+#define NVGPU_COMMON_FIFO_CHANNEL_WORKER_H
+
+#include <nvgpu/gk20a.h>
+
+void nvgpu_channel_worker_enqueue(struct nvgpu_channel *ch);
+
+static inline struct nvgpu_channel_worker *
+nvgpu_channel_worker_from_worker(struct nvgpu_worker *worker)
+{
+	return (struct nvgpu_channel_worker *)
+	   ((uintptr_t)worker - offsetof(struct nvgpu_channel_worker, worker));
+};
+
+#endif /* NVGPU_COMMON_FIFO_CHANNEL_WORKER_H */
--- a/drivers/gpu/nvgpu/common/fifo/engine_status.c
+++ b/drivers/gpu/nvgpu/common/fifo/engine_status.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/io.h>
+#include <nvgpu/engine_status.h>
+
+bool nvgpu_engine_status_is_ctxsw_switch(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_SWITCH;
+}
+
+bool nvgpu_engine_status_is_ctxsw_load(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_LOAD;
+}
+
+bool nvgpu_engine_status_is_ctxsw_save(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return	engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_SAVE;
+}
+
+bool nvgpu_engine_status_is_ctxsw(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return (nvgpu_engine_status_is_ctxsw_switch(engine_status) ||
+		nvgpu_engine_status_is_ctxsw_load(engine_status) ||
+		nvgpu_engine_status_is_ctxsw_save(engine_status));
+}
+
+bool nvgpu_engine_status_is_ctxsw_invalid(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return engine_status->ctxsw_status == NVGPU_CTX_STATUS_INVALID;
+}
+
+bool nvgpu_engine_status_is_ctxsw_valid(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return engine_status->ctxsw_status == NVGPU_CTX_STATUS_VALID;
+}
+bool nvgpu_engine_status_is_ctx_type_tsg(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return engine_status->ctx_id_type == ENGINE_STATUS_CTX_ID_TYPE_TSGID;
+}
+bool nvgpu_engine_status_is_next_ctx_type_tsg(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return engine_status->ctx_next_id_type ==
+		ENGINE_STATUS_CTX_NEXT_ID_TYPE_TSGID;
+}
+
+void nvgpu_engine_status_get_ctx_id_type(struct nvgpu_engine_status_info
+		*engine_status, u32 *ctx_id, u32 *ctx_type)
+{
+	*ctx_id = engine_status->ctx_id;
+	*ctx_type = engine_status->ctx_id_type;
+}
+
+void nvgpu_engine_status_get_next_ctx_id_type(struct nvgpu_engine_status_info
+		*engine_status, u32 *ctx_next_id,
+		u32 *ctx_next_type)
+{
+	*ctx_next_id = engine_status->ctx_next_id;
+	*ctx_next_type = engine_status->ctx_next_id_type;
+}
--- a/drivers/gpu/nvgpu/common/fifo/engines.c
+++ b/drivers/gpu/nvgpu/common/fifo/engines.c
@@ -0,0 +1,960 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <nvgpu/log.h>
+#include <nvgpu/errno.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/bitops.h>
+#ifdef CONFIG_NVGPU_LS_PMU
+#include <nvgpu/pmu.h>
+#include <nvgpu/pmu/mutex.h>
+#endif
+#include <nvgpu/runlist.h>
+#include <nvgpu/engines.h>
+#include <nvgpu/engine_status.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/pbdma_status.h>
+#include <nvgpu/power_features/pg.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/soc.h>
+#include <nvgpu/device.h>
+#include <nvgpu/gr/gr_falcon.h>
+#include <nvgpu/gr/gr.h>
+#include <nvgpu/gr/gr_instances.h>
+#include <nvgpu/fifo.h>
+#include <nvgpu/static_analysis.h>
+#include <nvgpu/swprofile.h>
+
+#include <nvgpu/fifo/swprofile.h>
+
+#define FECS_METHOD_WFI_RESTORE	0x80000U
+
+enum nvgpu_fifo_engine nvgpu_engine_enum_from_dev(struct gk20a *g,
+			const struct nvgpu_device *dev)
+{
+	enum nvgpu_fifo_engine ret = NVGPU_ENGINE_INVAL;
+
+	if (nvgpu_device_is_graphics(g, dev)) {
+		ret = NVGPU_ENGINE_GR;
+	} else if (nvgpu_device_is_ce(g, dev)) {
+		/* For now, all CE engines have separate runlists. We can
+		 * identify the NVGPU_ENGINE_GRCE type CE using runlist_id
+		 * comparsion logic with GR runlist_id in init_info()
+		 */
+		ret = NVGPU_ENGINE_ASYNC_CE;
+	} else {
+		ret = NVGPU_ENGINE_INVAL;
+	}
+
+	return ret;
+}
+
+const struct nvgpu_device *nvgpu_engine_get_active_eng_info(
+	struct gk20a *g, u32 engine_id)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	if (engine_id >= f->max_engines) {
+		return NULL;
+	}
+
+	return f->host_engines[engine_id];
+}
+
+bool nvgpu_engine_check_valid_id(struct gk20a *g, u32 engine_id)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	if (engine_id >= f->max_engines) {
+		return false;
+	}
+
+	return f->host_engines[engine_id] != NULL;
+}
+
+u32 nvgpu_engine_get_gr_id_for_inst(struct gk20a *g, u32 inst_id)
+{
+	const struct nvgpu_device *dev;
+
+	dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, inst_id);
+	if (dev == NULL) {
+		nvgpu_warn(g, "No GR devices on this GPU for inst[%u]?!",
+			inst_id);
+		return NVGPU_INVALID_ENG_ID;
+	}
+
+	return dev->engine_id;
+}
+
+u32 nvgpu_engine_get_gr_id(struct gk20a *g)
+{
+	/* Consider 1st available GR engine */
+	return nvgpu_engine_get_gr_id_for_inst(g, 0U);
+}
+
+u32 nvgpu_engine_act_interrupt_mask(struct gk20a *g, u32 engine_id)
+{
+	const struct nvgpu_device *dev = NULL;
+
+	dev = nvgpu_engine_get_active_eng_info(g, engine_id);
+	if (dev == NULL) {
+		return 0;
+	}
+
+	return BIT32(dev->intr_id);
+}
+
+u32 nvgpu_gr_engine_interrupt_mask(struct gk20a *g)
+{
+	const struct nvgpu_device *dev;
+	u32 intr_mask = 0U;
+	u32 i;
+
+	for (i = 0U; i < g->num_gr_instances; i++) {
+		dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS,
+				nvgpu_gr_get_syspipe_id(g, i));
+		if (dev == NULL) {
+			continue;
+		}
+
+		intr_mask |= BIT32(dev->intr_id);
+	}
+
+	return intr_mask;
+}
+
+u32 nvgpu_ce_engine_interrupt_mask(struct gk20a *g)
+{
+	const struct nvgpu_device *dev;
+	u32 i;
+	u32 mask = 0U;
+
+	/*
+	 * For old chips - pre-Pascal - we have COPY[0-2], for new chips we
+	 * have some number of LCE instances. For the purpose of this code we
+	 * imagine a system that could have both; in reality that'll never be
+	 * the case.
+	 *
+	 * This can be cleaned up in the future by defining a SW type for CE and
+	 * hiding this ugliness in the device management code.
+	 */
+	for (i = NVGPU_DEVTYPE_COPY0;  i <= NVGPU_DEVTYPE_COPY2; i++) {
+		dev = nvgpu_device_get(g, i, i - NVGPU_DEVTYPE_COPY0);
+		if (dev == NULL) {
+			continue;
+		}
+
+		mask |= BIT32(dev->intr_id);
+	}
+
+	/*
+	 * Now take care of LCEs.
+	 */
+	for (i = 0U; i < nvgpu_device_count(g, NVGPU_DEVTYPE_LCE); i++) {
+		dev = nvgpu_device_get(g, NVGPU_DEVTYPE_LCE, i);
+		nvgpu_assert(dev != NULL);
+
+		mask |= BIT32(dev->intr_id);
+	}
+
+	return mask;
+}
+
+#ifdef CONFIG_NVGPU_FIFO_ENGINE_ACTIVITY
+
+static void nvgpu_engine_enable_activity(struct gk20a *g,
+					 const struct nvgpu_device *dev)
+{
+	nvgpu_runlist_set_state(g, BIT32(dev->runlist_id), RUNLIST_ENABLED);
+}
+
+void nvgpu_engine_enable_activity_all(struct gk20a *g)
+{
+	u32 i;
+
+	for (i = 0; i < g->fifo.num_engines; i++) {
+		nvgpu_engine_enable_activity(g, g->fifo.active_engines[i]);
+	}
+}
+
+int nvgpu_engine_disable_activity(struct gk20a *g,
+				const struct nvgpu_device *dev,
+				bool wait_for_idle)
+{
+	u32 pbdma_chid = NVGPU_INVALID_CHANNEL_ID;
+	u32 engine_chid = NVGPU_INVALID_CHANNEL_ID;
+#ifdef CONFIG_NVGPU_LS_PMU
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	int mutex_ret = -EINVAL;
+#endif
+	int err = 0;
+	struct nvgpu_channel *ch = NULL;
+	struct nvgpu_engine_status_info engine_status;
+	struct nvgpu_pbdma_status_info pbdma_status;
+	unsigned long runlist_served_pbdmas;
+	unsigned long bit;
+	u32 pbdma_id;
+	struct nvgpu_fifo *f = &g->fifo;
+
+	nvgpu_log_fn(g, " ");
+
+	g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
+		 &engine_status);
+	if (engine_status.is_busy && !wait_for_idle) {
+		return -EBUSY;
+	}
+
+#ifdef CONFIG_NVGPU_LS_PMU
+	if (g->ops.pmu.is_pmu_supported(g)) {
+		mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
+						PMU_MUTEX_ID_FIFO, &token);
+	}
+#endif
+
+	nvgpu_runlist_set_state(g, BIT32(dev->runlist_id),
+			RUNLIST_DISABLED);
+
+	runlist_served_pbdmas = f->runlists[dev->runlist_id]->pbdma_bitmask;
+
+	for_each_set_bit(bit, &runlist_served_pbdmas,
+			 nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA)) {
+		pbdma_id = U32(bit);
+		/* chid from pbdma status */
+		g->ops.pbdma_status.read_pbdma_status_info(g,
+						pbdma_id,
+						&pbdma_status);
+		if (nvgpu_pbdma_status_is_chsw_valid(&pbdma_status) ||
+			nvgpu_pbdma_status_is_chsw_save(&pbdma_status)) {
+			pbdma_chid = pbdma_status.id;
+		} else if (nvgpu_pbdma_status_is_chsw_load(&pbdma_status) ||
+			nvgpu_pbdma_status_is_chsw_switch(&pbdma_status)) {
+			pbdma_chid = pbdma_status.next_id;
+		} else {
+			/* Nothing to do here */
+		}
+
+		if (pbdma_chid != NVGPU_INVALID_CHANNEL_ID) {
+			ch = nvgpu_channel_from_id(g, pbdma_chid);
+			if (ch != NULL) {
+				err = g->ops.fifo.preempt_channel(g, ch);
+				nvgpu_channel_put(ch);
+			}
+			if (err != 0) {
+				goto clean_up;
+			}
+		}
+	}
+
+	/* chid from engine status */
+	g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
+		 &engine_status);
+	if (nvgpu_engine_status_is_ctxsw_valid(&engine_status) ||
+	    nvgpu_engine_status_is_ctxsw_save(&engine_status)) {
+		engine_chid = engine_status.ctx_id;
+	} else if (nvgpu_engine_status_is_ctxsw_switch(&engine_status) ||
+	    nvgpu_engine_status_is_ctxsw_load(&engine_status)) {
+		engine_chid = engine_status.ctx_next_id;
+	} else {
+		/* Nothing to do here */
+	}
+
+	if (engine_chid != NVGPU_INVALID_ENG_ID && engine_chid != pbdma_chid) {
+		ch = nvgpu_channel_from_id(g, engine_chid);
+		if (ch != NULL) {
+			err = g->ops.fifo.preempt_channel(g, ch);
+			nvgpu_channel_put(ch);
+		}
+		if (err != 0) {
+			goto clean_up;
+		}
+	}
+
+clean_up:
+#ifdef CONFIG_NVGPU_LS_PMU
+	if (mutex_ret == 0) {
+		if (nvgpu_pmu_lock_release(g, g->pmu,
+			PMU_MUTEX_ID_FIFO, &token) != 0){
+			nvgpu_err(g, "failed to release PMU lock");
+		}
+	}
+#endif
+	if (err != 0) {
+		nvgpu_log_fn(g, "failed");
+		nvgpu_engine_enable_activity(g, dev);
+	} else {
+		nvgpu_log_fn(g, "done");
+	}
+	return err;
+}
+
+int nvgpu_engine_disable_activity_all(struct gk20a *g,
+					   bool wait_for_idle)
+{
+	unsigned int i;
+	int err = 0, ret = 0;
+
+	for (i = 0; i < g->fifo.num_engines; i++) {
+		err = nvgpu_engine_disable_activity(g,
+				g->fifo.active_engines[i],
+				wait_for_idle);
+		if (err != 0) {
+			nvgpu_err(g, "failed to disable engine %d activity",
+				  g->fifo.active_engines[i]->engine_id);
+			ret = err;
+			break;
+		}
+	}
+
+	if (err != 0) {
+		while (i-- != 0U) {
+			nvgpu_engine_enable_activity(g,
+					g->fifo.active_engines[i]);
+		}
+	}
+
+	return ret;
+}
+
+int nvgpu_engine_wait_for_idle(struct gk20a *g)
+{
+	struct nvgpu_timeout timeout;
+	u32 delay = POLL_DELAY_MIN_US;
+	int ret = 0, err = 0;
+	u32 i, host_num_engines;
+	struct nvgpu_engine_status_info engine_status;
+
+	nvgpu_log_fn(g, " ");
+
+	host_num_engines =
+		 nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
+
+	err = nvgpu_timeout_init(g, &timeout, nvgpu_get_poll_timeout(g),
+			   NVGPU_TIMER_CPU_TIMER);
+	if (err != 0) {
+		return -EINVAL;
+	}
+
+	for (i = 0; i < host_num_engines; i++) {
+		if (!nvgpu_engine_check_valid_id(g, i)) {
+			continue;
+		}
+
+		ret = -ETIMEDOUT;
+		do {
+			g->ops.engine_status.read_engine_status_info(g, i,
+				&engine_status);
+			if (!engine_status.is_busy) {
+				ret = 0;
+				break;
+			}
+
+			nvgpu_usleep_range(delay, delay * 2U);
+			delay = min_t(u32,
+					delay << 1U, POLL_DELAY_MAX_US);
+		} while (nvgpu_timeout_expired(&timeout) == 0);
+
+		if (ret != 0) {
+			/* possible causes:
+			 * check register settings programmed in hal set by
+			 * elcg_init_idle_filters and init_therm_setup_hw
+			 */
+			nvgpu_err(g, "cannot idle engine: %u "
+					"engine_status: 0x%08x", i,
+					engine_status.reg_data);
+			break;
+		}
+	}
+
+	nvgpu_log_fn(g, "done");
+
+	return ret;
+}
+
+#endif /* CONFIG_NVGPU_FIFO_ENGINE_ACTIVITY */
+
+int nvgpu_engine_setup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	int err = 0;
+	size_t size;
+
+	f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
+	size = nvgpu_safe_mult_u64(f->max_engines,
+				   sizeof(struct nvgpu_device *));
+
+	/*
+	 * Allocate the two device lists for host devices.
+	 */
+	f->host_engines = nvgpu_kzalloc(g, size);
+	if (f->host_engines == NULL) {
+		nvgpu_err(g, "OOM allocating host engine list");
+		return -ENOMEM;
+	}
+	f->active_engines = nvgpu_kzalloc(g, size);
+	if (f->active_engines == NULL) {
+		nvgpu_err(g, "no mem for active engine list");
+		err = -ENOMEM;
+		goto clean_up_engine_info;
+	}
+
+	err = nvgpu_engine_init_info(f);
+	if (err != 0) {
+		nvgpu_err(g, "init engine info failed");
+		goto clean_up;
+	}
+
+	return 0;
+
+clean_up:
+	nvgpu_kfree(g, f->active_engines);
+	f->active_engines = NULL;
+
+clean_up_engine_info:
+	nvgpu_kfree(g, f->host_engines);
+	f->host_engines = NULL;
+
+	return err;
+}
+
+void nvgpu_engine_cleanup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	f->num_engines = 0;
+	nvgpu_kfree(g, f->host_engines);
+	f->host_engines = NULL;
+	nvgpu_kfree(g, f->active_engines);
+	f->active_engines = NULL;
+}
+
+#ifdef CONFIG_NVGPU_ENGINE_RESET
+static void nvgpu_engine_gr_reset(struct gk20a *g)
+{
+	struct nvgpu_swprofiler *prof = &g->fifo.eng_reset_profiler;
+	int err = 0;
+
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_PREAMBLE);
+
+#ifdef CONFIG_NVGPU_POWER_PG
+	if (nvgpu_pg_elpg_disable(g) != 0 ) {
+		nvgpu_err(g, "failed to set disable elpg");
+	}
+#endif
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_ELPG_DISABLE);
+
+#ifdef CONFIG_NVGPU_FECS_TRACE
+	/*
+	 * Resetting engine will alter read/write index. Need to flush
+	 * circular buffer before re-enabling FECS.
+	 */
+	if (g->ops.gr.fecs_trace.reset != NULL) {
+		if (g->ops.gr.fecs_trace.reset(g) != 0) {
+			nvgpu_warn(g, "failed to reset fecs traces");
+		}
+	}
+#endif
+
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_FECS_TRACE_RESET);
+
+	/*
+	 * HALT_PIPELINE method and gr reset during recovery is supported
+	 * starting nvgpu-next simulation.
+	 */
+	err = g->ops.gr.falcon.ctrl_ctxsw(g,
+			NVGPU_GR_FALCON_METHOD_HALT_PIPELINE, 0U, NULL);
+	if (err != 0) {
+		nvgpu_err(g, "failed to halt gr pipe");
+	}
+
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_HALT_PIPELINE);
+
+	/*
+	 * resetting only engine is not
+	 * enough, we do full init sequence
+	 */
+	nvgpu_log(g, gpu_dbg_rec, "resetting gr engine");
+
+	err = nvgpu_gr_reset(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to reset gr engine");
+	}
+
+#ifdef CONFIG_NVGPU_POWER_PG
+	if (nvgpu_pg_elpg_enable(g) != 0) {
+		nvgpu_err(g, "failed to set enable elpg");
+	}
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_ELPG_REENABLE);
+#endif
+}
+
+void nvgpu_engine_reset(struct gk20a *g, u32 engine_id)
+{
+	struct nvgpu_swprofiler *prof = &g->fifo.eng_reset_profiler;
+	const struct nvgpu_device *dev;
+	int err = 0;
+	u32 gr_instance_id;
+
+	nvgpu_log_fn(g, " ");
+
+	if (g == NULL) {
+		return;
+	}
+
+	nvgpu_swprofile_begin_sample(prof);
+
+	dev = nvgpu_engine_get_active_eng_info(g, engine_id);
+	if (dev == NULL) {
+		nvgpu_err(g, "unsupported engine_id %d", engine_id);
+		return;
+	}
+
+	if (!nvgpu_device_is_ce(g, dev) &&
+	    !nvgpu_device_is_graphics(g, dev)) {
+		nvgpu_warn(g, "Ignoring reset for non-host engine.");
+		return;
+	}
+
+	/*
+	 * Simple case first: reset a copy engine.
+	 */
+	if (nvgpu_device_is_ce(g, dev)) {
+		err = nvgpu_mc_reset_dev(g, dev);
+		if (err != 0) {
+			nvgpu_log_info(g, "CE engine [id:%u] reset failed",
+				dev->engine_id);
+		}
+		return;
+	}
+
+	/*
+	 * Now reset a GR engine.
+	 */
+	gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id_for_syspipe(
+			g, dev->inst_id);
+
+	nvgpu_gr_exec_for_instance(g,
+		gr_instance_id, nvgpu_engine_gr_reset(g));
+}
+#endif
+
+u32 nvgpu_engine_get_fast_ce_runlist_id(struct gk20a *g)
+{
+	const struct nvgpu_device *dev;
+	u32 nr_lces;
+	u32 i;
+
+	/*
+	 * Obtain a runlist ID for the fastest available CE. The priority order
+	 * is:
+	 *
+	 *   1. Last available LCE
+	 *   2. Last available COPY[0-2]
+	 *   3. GRAPHICS runlist as a last resort.
+	 */
+	nr_lces = nvgpu_device_count(g, NVGPU_DEVTYPE_LCE);
+	if (nr_lces > 0U) {
+		dev = nvgpu_device_get(g,
+				       NVGPU_DEVTYPE_LCE,
+				       nr_lces - 1U);
+		nvgpu_assert(dev != NULL);
+
+		return dev->runlist_id;
+	}
+
+	/*
+	 * Note: this only works since NVGPU_DEVTYPE_GRAPHICS is 0 and the COPYx
+	 * are all > 0.
+	 */
+	for (i = NVGPU_DEVTYPE_COPY2; i >= NVGPU_DEVTYPE_COPY0; i--) {
+		dev = nvgpu_device_get(g, i, i - NVGPU_DEVTYPE_COPY0);
+		if (dev != NULL) {
+			return dev->runlist_id;
+		}
+	}
+
+	/*
+	 * Fall back to GR.
+	 */
+	dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0);
+	nvgpu_assert(dev != NULL);
+
+	return dev->runlist_id;
+}
+
+u32 nvgpu_engine_get_gr_runlist_id(struct gk20a *g)
+{
+	const struct nvgpu_device *dev;
+
+	dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0);
+	if (dev == NULL) {
+		nvgpu_warn(g, "No GR device on this GPU?!");
+		return NVGPU_INVALID_RUNLIST_ID;
+	}
+
+	return dev->runlist_id;
+}
+
+bool nvgpu_engine_is_valid_runlist_id(struct gk20a *g, u32 runlist_id)
+{
+	u32 i;
+	struct nvgpu_fifo *f = &g->fifo;
+
+	for (i = 0U; i < f->num_engines; i++) {
+		const struct nvgpu_device *dev = f->active_engines[i];
+
+		if (dev->runlist_id == runlist_id) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/*
+ * Link engine IDs to MMU IDs and vice versa.
+ */
+u32 nvgpu_engine_id_to_mmu_fault_id(struct gk20a *g, u32 engine_id)
+{
+	const struct nvgpu_device *dev;
+
+	dev = nvgpu_engine_get_active_eng_info(g, engine_id);
+
+	if (dev == NULL) {
+		nvgpu_err(g,
+			  "engine_id: %u is not in active list",
+			  engine_id);
+		return NVGPU_INVALID_ENG_ID;
+	}
+
+	return dev->fault_id;
+}
+
+u32 nvgpu_engine_mmu_fault_id_to_engine_id(struct gk20a *g, u32 fault_id)
+{
+	u32 i;
+	const struct nvgpu_device *dev;
+	struct nvgpu_fifo *f = &g->fifo;
+
+	for (i = 0U; i < f->num_engines; i++) {
+		dev = f->active_engines[i];
+
+		if (dev->fault_id == fault_id) {
+			return dev->engine_id;
+		}
+	}
+
+	return NVGPU_INVALID_ENG_ID;
+}
+
+u32 nvgpu_engine_get_mask_on_id(struct gk20a *g, u32 id, bool is_tsg)
+{
+	unsigned int i;
+	u32 engines = 0;
+	struct nvgpu_engine_status_info engine_status;
+	u32 ctx_id;
+	u32 type;
+	bool busy;
+
+	for (i = 0; i < g->fifo.num_engines; i++) {
+		const struct nvgpu_device *dev = g->fifo.active_engines[i];
+
+		g->ops.engine_status.read_engine_status_info(g,
+			dev->engine_id, &engine_status);
+
+		if (nvgpu_engine_status_is_ctxsw_load(
+			&engine_status)) {
+			nvgpu_engine_status_get_next_ctx_id_type(
+				&engine_status, &ctx_id, &type);
+		} else {
+			nvgpu_engine_status_get_ctx_id_type(
+				&engine_status, &ctx_id, &type);
+		}
+
+		busy = engine_status.is_busy;
+
+		if (!busy || !(ctx_id == id)) {
+			continue;
+		}
+
+		if ((is_tsg  && (type == ENGINE_STATUS_CTX_ID_TYPE_TSGID)) ||
+		    (!is_tsg && (type == ENGINE_STATUS_CTX_ID_TYPE_CHID))) {
+			engines |= BIT32(dev->engine_id);
+		}
+	}
+
+	return engines;
+}
+
+static int nvgpu_engine_init_one_dev(struct nvgpu_fifo *f,
+				     const struct nvgpu_device *dev)
+{
+	bool found;
+	struct nvgpu_device *dev_rw;
+	struct gk20a *g = f->g;
+
+	dev_rw = (struct nvgpu_device *)dev;
+
+	/*
+	 * Populate the PBDMA info for this device; ideally it'd be done
+	 * during device init, but the FIFO unit is not out of reset that
+	 * early in the nvgpu_finalize_poweron() sequence.
+	 *
+	 * We only need to do this for native; vGPU already has pbdma_id
+	 * populated during device initialization.
+	 */
+	if (g->ops.fifo.find_pbdma_for_runlist != NULL) {
+		found = g->ops.fifo.find_pbdma_for_runlist(g,
+							   dev->runlist_id,
+							   &dev_rw->pbdma_id);
+		if (!found) {
+			nvgpu_err(g, "busted pbdma map");
+			return -EINVAL;
+		}
+	}
+
+#if defined(CONFIG_NVGPU_NEXT)
+	{
+		int err = nvgpu_next_engine_init_one_dev(g, dev);
+		if (err != 0) {
+			return err;
+		}
+	}
+#endif
+
+	f->host_engines[dev->engine_id] = dev;
+	f->active_engines[f->num_engines] = dev;
+	++f->num_engines;
+
+	return 0;
+}
+
+int nvgpu_engine_init_info(struct nvgpu_fifo *f)
+{
+	int err;
+	struct gk20a *g = f->g;
+	const struct nvgpu_device *dev;
+
+	f->num_engines = 0;
+
+	nvgpu_log(g, gpu_dbg_device, "Loading host engines from device list");
+	nvgpu_log(g, gpu_dbg_device, "  GFX devices: %u",
+		  nvgpu_device_count(g, NVGPU_DEVTYPE_GRAPHICS));
+
+	nvgpu_device_for_each(g, dev, NVGPU_DEVTYPE_GRAPHICS) {
+		err = nvgpu_engine_init_one_dev(f, dev);
+		if (err != 0) {
+			return err;
+		}
+	}
+
+	return g->ops.engine.init_ce_info(f);
+}
+
+void nvgpu_engine_get_id_and_type(struct gk20a *g, u32 engine_id,
+					  u32 *id, u32 *type)
+{
+	struct nvgpu_engine_status_info engine_status;
+
+	g->ops.engine_status.read_engine_status_info(g, engine_id,
+		&engine_status);
+
+	/* use next_id if context load is failing */
+	if (nvgpu_engine_status_is_ctxsw_load(
+		&engine_status)) {
+		nvgpu_engine_status_get_next_ctx_id_type(
+			&engine_status, id, type);
+	} else {
+		nvgpu_engine_status_get_ctx_id_type(
+			&engine_status, id, type);
+	}
+}
+
+u32 nvgpu_engine_find_busy_doing_ctxsw(struct gk20a *g,
+			u32 *id_ptr, bool *is_tsg_ptr)
+{
+	u32 i;
+	u32 id = U32_MAX;
+	bool is_tsg = false;
+	u32 mailbox2;
+	struct nvgpu_engine_status_info engine_status;
+	const struct nvgpu_device *dev = NULL;
+
+	for (i = 0U; i < g->fifo.num_engines; i++) {
+		dev = g->fifo.active_engines[i];
+
+		g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
+			&engine_status);
+
+		/*
+		 * we are interested in busy engines that
+		 * are doing context switch
+		 */
+		if (!engine_status.is_busy ||
+		    !nvgpu_engine_status_is_ctxsw(&engine_status)) {
+			continue;
+		}
+
+		if (nvgpu_engine_status_is_ctxsw_load(&engine_status)) {
+			id = engine_status.ctx_next_id;
+			is_tsg = nvgpu_engine_status_is_next_ctx_type_tsg(
+					&engine_status);
+		} else if (nvgpu_engine_status_is_ctxsw_switch(&engine_status)) {
+			mailbox2 = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
+					NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX2);
+			if ((mailbox2 & FECS_METHOD_WFI_RESTORE) != 0U) {
+				id = engine_status.ctx_next_id;
+				is_tsg = nvgpu_engine_status_is_next_ctx_type_tsg(
+						&engine_status);
+			} else {
+				id = engine_status.ctx_id;
+				is_tsg = nvgpu_engine_status_is_ctx_type_tsg(
+						&engine_status);
+			}
+		} else {
+			id = engine_status.ctx_id;
+			is_tsg = nvgpu_engine_status_is_ctx_type_tsg(
+					&engine_status);
+		}
+		break;
+	}
+
+	*id_ptr = id;
+	*is_tsg_ptr = is_tsg;
+
+	return dev->engine_id;
+}
+
+u32 nvgpu_engine_get_runlist_busy_engines(struct gk20a *g, u32 runlist_id)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 i, eng_bitmask = 0U;
+	struct nvgpu_engine_status_info engine_status;
+
+	for (i = 0U; i < f->num_engines; i++) {
+		const struct nvgpu_device *dev = f->active_engines[i];
+
+		g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
+			&engine_status);
+
+		if (engine_status.is_busy && (dev->runlist_id == runlist_id)) {
+			eng_bitmask |= BIT32(dev->engine_id);
+		}
+	}
+
+	return eng_bitmask;
+}
+
+#ifdef CONFIG_NVGPU_DEBUGGER
+bool nvgpu_engine_should_defer_reset(struct gk20a *g, u32 engine_id,
+		u32 engine_subid, bool fake_fault)
+{
+	const struct nvgpu_device *dev;
+
+	dev = nvgpu_engine_get_active_eng_info(g, engine_id);
+	if (dev == NULL) {
+		return false;
+	}
+
+	/*
+	 * channel recovery is only deferred if an sm debugger
+	 * is attached and has MMU debug mode is enabled
+	 */
+	if (!g->ops.gr.sm_debugger_attached(g) ||
+	    !g->ops.fb.is_debug_mode_enabled(g)) {
+		return false;
+	}
+
+	/* if this fault is fake (due to RC recovery), don't defer recovery */
+	if (fake_fault) {
+		return false;
+	}
+
+	if (dev->type != NVGPU_DEVTYPE_GRAPHICS) {
+		return false;
+	}
+
+	return g->ops.engine.is_fault_engine_subid_gpc(g, engine_subid);
+}
+#endif
+
+u32 nvgpu_engine_mmu_fault_id_to_veid(struct gk20a *g, u32 mmu_fault_id,
+			u32 gr_eng_fault_id)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 num_subctx;
+	u32 veid = INVAL_ID;
+
+	num_subctx = f->max_subctx_count;
+
+	if ((mmu_fault_id >= gr_eng_fault_id) &&
+		(mmu_fault_id < nvgpu_safe_add_u32(gr_eng_fault_id,
+						num_subctx))) {
+		veid = mmu_fault_id - gr_eng_fault_id;
+	}
+
+	return veid;
+}
+
+static u32 nvgpu_engine_mmu_fault_id_to_eng_id_and_veid(struct gk20a *g,
+			 u32 mmu_fault_id, u32 *veid)
+{
+	u32 i;
+	u32 engine_id = INVAL_ID;
+	const struct nvgpu_device *dev;
+	struct nvgpu_fifo *f = &g->fifo;
+
+	for (i = 0U; i < f->num_engines; i++) {
+		dev = f->active_engines[i];
+
+		if (dev->type == NVGPU_DEVTYPE_GRAPHICS) {
+			*veid = nvgpu_engine_mmu_fault_id_to_veid(g,
+					mmu_fault_id, dev->fault_id);
+			if (*veid != INVAL_ID) {
+				engine_id = dev->engine_id;
+				break;
+			}
+		} else {
+			if (dev->fault_id == mmu_fault_id) {
+				engine_id = dev->engine_id;
+				*veid = INVAL_ID;
+				break;
+			}
+		}
+	}
+	return engine_id;
+}
+
+void nvgpu_engine_mmu_fault_id_to_eng_ve_pbdma_id(struct gk20a *g,
+	u32 mmu_fault_id, u32 *engine_id, u32 *veid, u32 *pbdma_id)
+{
+	*engine_id = nvgpu_engine_mmu_fault_id_to_eng_id_and_veid(g,
+				 mmu_fault_id, veid);
+
+	if (*engine_id == INVAL_ID) {
+		*pbdma_id = g->ops.fifo.mmu_fault_id_to_pbdma_id(g,
+				mmu_fault_id);
+	} else {
+		*pbdma_id = INVAL_ID;
+	}
+}
--- a/drivers/gpu/nvgpu/common/fifo/fifo.c
+++ b/drivers/gpu/nvgpu/common/fifo/fifo.c
@@ -0,0 +1,315 @@
+/*
+ * FIFO
+ *
+ * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/trace.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/fifo.h>
+#include <nvgpu/engines.h>
+#include <nvgpu/runlist.h>
+#include <nvgpu/preempt.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/pbdma.h>
+#include <nvgpu/tsg.h>
+#include <nvgpu/vm_area.h>
+#include <nvgpu/nvgpu_err.h>
+#include <nvgpu/cic.h>
+#include <nvgpu/mc.h>
+#include <nvgpu/swprofile.h>
+#include <nvgpu/fifo/swprofile.h>
+
+static const char *nvgpu_fifo_kickoff_profile_events[] = {
+	NVGPU_FIFO_KICKOFF_PROFILE_EVENTS,
+};
+
+static const char *nvgpu_fifo_recovery_profile_events[] = {
+	NVGPU_FIFO_RECOVERY_PROFILE_EVENTS,
+};
+
+static const char *nvgpu_fifo_engine_reset_events[] = {
+	NVGPU_FIFO_ENGINE_RESET_EVENTS,
+};
+
+void nvgpu_fifo_cleanup_sw_common(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	nvgpu_log_fn(g, " ");
+
+#ifdef CONFIG_NVGPU_USERD
+	g->ops.userd.cleanup_sw(g);
+#endif
+	nvgpu_channel_cleanup_sw(g);
+	nvgpu_tsg_cleanup_sw(g);
+	nvgpu_runlist_cleanup_sw(g);
+	nvgpu_engine_cleanup_sw(g);
+	if (g->ops.pbdma.cleanup_sw != NULL) {
+		g->ops.pbdma.cleanup_sw(g);
+	}
+
+#ifdef CONFIG_NVGPU_DEBUGGER
+	f->deferred_reset_pending = false;
+	nvgpu_mutex_destroy(&f->deferred_reset_mutex);
+#endif
+	nvgpu_mutex_destroy(&f->engines_reset_mutex);
+	nvgpu_mutex_destroy(&f->intr.isr.mutex);
+
+	f->sw_ready = false;
+}
+
+void nvgpu_fifo_cleanup_sw(struct gk20a *g)
+{
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+	nvgpu_channel_worker_deinit(g);
+#endif
+	nvgpu_fifo_cleanup_sw_common(g);
+}
+
+static void nvgpu_fifo_remove_support(struct nvgpu_fifo *f)
+{
+	struct gk20a *g = f->g;
+
+	g->ops.fifo.cleanup_sw(g);
+}
+
+int nvgpu_fifo_setup_sw_common(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	f->g = g;
+
+	nvgpu_mutex_init(&f->intr.isr.mutex);
+	nvgpu_mutex_init(&f->engines_reset_mutex);
+#ifdef CONFIG_NVGPU_DEBUGGER
+	nvgpu_mutex_init(&f->deferred_reset_mutex);
+#endif
+
+	nvgpu_swprofile_initialize(g, &f->kickoff_profiler,
+				 nvgpu_fifo_kickoff_profile_events);
+	nvgpu_swprofile_initialize(g, &f->recovery_profiler,
+				 nvgpu_fifo_recovery_profile_events);
+	nvgpu_swprofile_initialize(g, &f->eng_reset_profiler,
+				 nvgpu_fifo_engine_reset_events);
+
+
+	err = nvgpu_channel_setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init channel support");
+		goto clean_up;
+	}
+
+	err = nvgpu_tsg_setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init tsg support");
+		goto clean_up_channel;
+	}
+
+	if (g->ops.pbdma.setup_sw != NULL) {
+		err = g->ops.pbdma.setup_sw(g);
+		if (err != 0) {
+			nvgpu_err(g, "failed to init pbdma support");
+			goto clean_up_tsg;
+		}
+	}
+
+	err = nvgpu_engine_setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init engine support");
+		goto clean_up_pbdma;
+	}
+
+	err = nvgpu_runlist_setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init runlist support");
+		goto clean_up_engine;
+	}
+
+#ifdef CONFIG_NVGPU_USERD
+	err = g->ops.userd.setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init userd support");
+		goto clean_up_runlist;
+	}
+#endif
+
+	f->remove_support = nvgpu_fifo_remove_support;
+
+	nvgpu_log_fn(g, "done");
+	return 0;
+
+#ifdef CONFIG_NVGPU_USERD
+clean_up_runlist:
+	nvgpu_runlist_cleanup_sw(g);
+#endif
+
+clean_up_engine:
+	nvgpu_engine_cleanup_sw(g);
+
+clean_up_pbdma:
+	if (g->ops.pbdma.cleanup_sw != NULL) {
+		g->ops.pbdma.cleanup_sw(g);
+	}
+
+clean_up_tsg:
+	nvgpu_tsg_cleanup_sw(g);
+
+clean_up_channel:
+	nvgpu_channel_cleanup_sw(g);
+
+clean_up:
+	nvgpu_err(g, "init fifo support failed");
+	return err;
+}
+
+int nvgpu_fifo_setup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	if (f->sw_ready) {
+		nvgpu_log_fn(g, "skip init");
+		return 0;
+	}
+
+	err = nvgpu_fifo_setup_sw_common(g);
+	if (err != 0) {
+		nvgpu_err(g, "fifo common sw setup failed, err=%d", err);
+		return err;
+	}
+
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+	err = nvgpu_channel_worker_init(g);
+	if (err != 0) {
+		nvgpu_err(g, "worker init fail, err=%d", err);
+		goto clean_up;
+	}
+#endif
+
+	f->sw_ready = true;
+
+	nvgpu_log_fn(g, "done");
+	return 0;
+
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+clean_up:
+	nvgpu_fifo_cleanup_sw_common(g);
+
+	return err;
+#endif
+}
+
+int nvgpu_fifo_init_support(struct gk20a *g)
+{
+	int err;
+
+	err = g->ops.fifo.setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "fifo sw setup failed, err=%d", err);
+		return err;
+	}
+
+	if (g->ops.fifo.init_fifo_setup_hw != NULL) {
+		err = g->ops.fifo.init_fifo_setup_hw(g);
+		if (err != 0) {
+			nvgpu_err(g, "fifo hw setup failed, err=%d", err);
+			goto clean_up;
+		}
+	}
+
+	return 0;
+
+clean_up:
+	nvgpu_fifo_cleanup_sw_common(g);
+
+	return err;
+}
+
+static const char * const pbdma_ch_eng_status_str[] = {
+	"invalid",
+	"valid",
+	"NA",
+	"NA",
+	"NA",
+	"load",
+	"save",
+	"switch",
+};
+
+static const char * const not_found_str[] = {
+	"NOT FOUND"
+};
+
+const char *nvgpu_fifo_decode_pbdma_ch_eng_status(u32 index)
+{
+	if (index >= ARRAY_SIZE(pbdma_ch_eng_status_str)) {
+		return not_found_str[0];
+	} else {
+		return pbdma_ch_eng_status_str[index];
+	}
+}
+
+static void disable_fifo_interrupts(struct gk20a *g)
+{
+	/** Disable fifo intr */
+	g->ops.fifo.intr_0_enable(g, false);
+	g->ops.fifo.intr_1_enable(g, false);
+
+	if (g->ops.fifo.intr_top_enable == NULL) {
+		nvgpu_cic_intr_stall_unit_config(g, NVGPU_CIC_INTR_UNIT_FIFO,
+					NVGPU_CIC_INTR_DISABLE);
+		nvgpu_cic_intr_nonstall_unit_config(g, NVGPU_CIC_INTR_UNIT_FIFO,
+					   NVGPU_CIC_INTR_DISABLE);
+	} else {
+		g->ops.fifo.intr_top_enable(g, NVGPU_CIC_INTR_DISABLE);
+	}
+}
+
+int nvgpu_fifo_suspend(struct gk20a *g)
+{
+	nvgpu_log_fn(g, " ");
+
+	if (g->ops.mm.is_bar1_supported(g)) {
+		g->ops.fifo.bar1_snooping_disable(g);
+	}
+
+	disable_fifo_interrupts(g);
+
+	nvgpu_log_fn(g, "done");
+	return 0;
+}
+
+void nvgpu_fifo_sw_quiesce(struct gk20a *g)
+{
+	u32 runlist_mask = U32_MAX;
+
+	g->ops.runlist.write_state(g, runlist_mask, RUNLIST_DISABLED);
+
+	/* Preempt all runlists */
+	nvgpu_fifo_preempt_runlists_for_rc(g, runlist_mask);
+}
--- a/drivers/gpu/nvgpu/common/fifo/job.c
+++ b/drivers/gpu/nvgpu/common/fifo/job.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/log.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/barrier.h>
+#include <nvgpu/circ_buf.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/job.h>
+#include <nvgpu/priv_cmdbuf.h>
+#include <nvgpu/fence.h>
+
+static inline struct nvgpu_channel_job *
+channel_gk20a_job_from_list(struct nvgpu_list_node *node)
+{
+	return (struct nvgpu_channel_job *)
+	((uintptr_t)node - offsetof(struct nvgpu_channel_job, list));
+};
+
+int nvgpu_channel_alloc_job(struct nvgpu_channel *c,
+		struct nvgpu_channel_job **job_out)
+{
+	unsigned int put = c->joblist.pre_alloc.put;
+	unsigned int get = c->joblist.pre_alloc.get;
+	unsigned int next = (put + 1) % c->joblist.pre_alloc.length;
+	bool full = next == get;
+
+	if (full) {
+		return -EAGAIN;
+	}
+
+	*job_out = &c->joblist.pre_alloc.jobs[put];
+	(void) memset(*job_out, 0, sizeof(**job_out));
+
+	return 0;
+}
+
+void nvgpu_channel_free_job(struct nvgpu_channel *c,
+		struct nvgpu_channel_job *job)
+{
+	/*
+	 * Nothing needed for now. The job contents are preallocated. The
+	 * completion fence may briefly outlive the job, but the job memory is
+	 * reclaimed only when a new submit comes in and the ringbuffer has ran
+	 * out of space.
+	 */
+}
+
+void nvgpu_channel_joblist_lock(struct nvgpu_channel *c)
+{
+	nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock);
+}
+
+void nvgpu_channel_joblist_unlock(struct nvgpu_channel *c)
+{
+	nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock);
+}
+
+struct nvgpu_channel_job *nvgpu_channel_joblist_peek(struct nvgpu_channel *c)
+{
+	unsigned int get = c->joblist.pre_alloc.get;
+	unsigned int put = c->joblist.pre_alloc.put;
+	bool empty = get == put;
+
+	return empty ? NULL : &c->joblist.pre_alloc.jobs[get];
+}
+
+void nvgpu_channel_joblist_add(struct nvgpu_channel *c,
+		struct nvgpu_channel_job *job)
+{
+	c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1U) %
+			(c->joblist.pre_alloc.length);
+}
+
+void nvgpu_channel_joblist_delete(struct nvgpu_channel *c,
+		struct nvgpu_channel_job *job)
+{
+	c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1U) %
+			(c->joblist.pre_alloc.length);
+}
+
+int nvgpu_channel_joblist_init(struct nvgpu_channel *c, u32 num_jobs)
+{
+	int err;
+	u32 size;
+
+	size = (u32)sizeof(struct nvgpu_channel_job);
+	if (num_jobs > nvgpu_safe_sub_u32(U32_MAX / size, 1U)) {
+		err = -ERANGE;
+		goto clean_up;
+	}
+
+	/*
+	 * The max capacity of this ring buffer is the alloc size minus one (in
+	 * units of item slot), so allocate a size of (num_jobs + 1) * size
+	 * bytes.
+	 */
+	c->joblist.pre_alloc.jobs = nvgpu_vzalloc(c->g,
+			nvgpu_safe_mult_u32(
+				nvgpu_safe_add_u32(num_jobs, 1U),
+				size));
+	if (c->joblist.pre_alloc.jobs == NULL) {
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	/*
+	 * length is the allocation size of the ringbuffer; the number of jobs
+	 * that fit is one less.
+	 */
+	c->joblist.pre_alloc.length = nvgpu_safe_add_u32(num_jobs, 1U);
+	c->joblist.pre_alloc.put = 0;
+	c->joblist.pre_alloc.get = 0;
+
+	return 0;
+
+clean_up:
+	nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
+	(void) memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc));
+	return err;
+}
+
+void nvgpu_channel_joblist_deinit(struct nvgpu_channel *c)
+{
+	if (c->joblist.pre_alloc.jobs != NULL) {
+		nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
+		c->joblist.pre_alloc.jobs = NULL;
+	}
+}
--- a/drivers/gpu/nvgpu/common/fifo/pbdma.c
+++ b/drivers/gpu/nvgpu/common/fifo/pbdma.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/pbdma.h>
+
+static void nvgpu_pbdma_init_intr_descs(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	if (g->ops.pbdma.device_fatal_0_intr_descs != NULL) {
+		f->intr.pbdma.device_fatal_0 =
+			g->ops.pbdma.device_fatal_0_intr_descs();
+	}
+
+	if (g->ops.pbdma.channel_fatal_0_intr_descs != NULL) {
+		f->intr.pbdma.channel_fatal_0 =
+			g->ops.pbdma.channel_fatal_0_intr_descs();
+	}
+	if (g->ops.pbdma.restartable_0_intr_descs != NULL) {
+		f->intr.pbdma.restartable_0 =
+			g->ops.pbdma.restartable_0_intr_descs();
+	}
+}
+
+int nvgpu_pbdma_setup_sw(struct gk20a *g)
+{
+	nvgpu_pbdma_init_intr_descs(g);
+
+	return 0;
+}
+
+void nvgpu_pbdma_cleanup_sw(struct gk20a *g)
+{
+	return;
+}
--- a/drivers/gpu/nvgpu/common/fifo/pbdma_status.c
+++ b/drivers/gpu/nvgpu/common/fifo/pbdma_status.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/pbdma_status.h>
+
+bool nvgpu_pbdma_status_is_chsw_switch(struct nvgpu_pbdma_status_info
+		*pbdma_status)
+{
+	return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_SWITCH;
+}
+bool nvgpu_pbdma_status_is_chsw_load(struct nvgpu_pbdma_status_info
+		*pbdma_status)
+{
+	return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_LOAD;
+}
+bool nvgpu_pbdma_status_is_chsw_save(struct nvgpu_pbdma_status_info
+		*pbdma_status)
+{
+	return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_SAVE;
+}
+bool nvgpu_pbdma_status_is_chsw_valid(struct nvgpu_pbdma_status_info
+		*pbdma_status)
+{
+	return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_VALID;
+}
+bool nvgpu_pbdma_status_is_id_type_tsg(struct nvgpu_pbdma_status_info
+		*pbdma_status)
+{
+	return pbdma_status->id_type == PBDMA_STATUS_ID_TYPE_TSGID;
+}
+bool nvgpu_pbdma_status_is_next_id_type_tsg(struct nvgpu_pbdma_status_info
+		*pbdma_status)
+{
+	return pbdma_status->next_id_type == PBDMA_STATUS_NEXT_ID_TYPE_TSGID;
+}
--- a/drivers/gpu/nvgpu/common/fifo/preempt.c
+++ b/drivers/gpu/nvgpu/common/fifo/preempt.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/soc.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/errata.h>
+#include <nvgpu/runlist.h>
+#include <nvgpu/types.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/tsg.h>
+#include <nvgpu/preempt.h>
+#include <nvgpu/nvgpu_err.h>
+#include <nvgpu/cic.h>
+#include <nvgpu/rc.h>
+#ifdef CONFIG_NVGPU_LS_PMU
+#include <nvgpu/pmu/mutex.h>
+#endif
+
+u32 nvgpu_preempt_get_timeout(struct gk20a *g)
+{
+	return g->ctxsw_timeout_period_ms;
+}
+
+int nvgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg)
+{
+	int ret = 0;
+	u32 preempt_retry_count = 10U;
+	u32 preempt_retry_timeout =
+			nvgpu_preempt_get_timeout(g) / preempt_retry_count;
+#ifdef CONFIG_NVGPU_LS_PMU
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	int mutex_ret = 0;
+#endif
+
+	nvgpu_log_fn(g, "tsgid: %d", tsg->tsgid);
+
+	if (tsg->runlist == NULL) {
+		return 0;
+	}
+
+	do {
+		nvgpu_mutex_acquire(&tsg->runlist->runlist_lock);
+
+		if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
+			nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id),
+						RUNLIST_DISABLED);
+		}
+
+#ifdef CONFIG_NVGPU_LS_PMU
+		mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
+						   PMU_MUTEX_ID_FIFO, &token);
+#endif
+		g->ops.fifo.preempt_trigger(g, tsg->tsgid, ID_TYPE_TSG);
+
+		/*
+		 * Poll for preempt done. if stalling interrupts are pending
+		 * while preempt is in progress we poll for stalling interrupts
+		 * to finish based on return value from this function and
+		 * retry preempt again.
+		 * If HW is hung, on the last retry instance we try to identify
+		 * the engines hung and set the runlist reset_eng_bitmask
+		 * and mark preemption completion.
+		 */
+		ret = g->ops.fifo.is_preempt_pending(g, tsg->tsgid,
+					ID_TYPE_TSG, preempt_retry_count > 1U);
+
+#ifdef CONFIG_NVGPU_LS_PMU
+		if (mutex_ret == 0) {
+			int err = nvgpu_pmu_lock_release(g, g->pmu,
+						PMU_MUTEX_ID_FIFO, &token);
+			if (err != 0) {
+				nvgpu_err(g, "PMU_MUTEX_ID_FIFO not released err=%d", err);
+			}
+		}
+#endif
+		if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
+			nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id),
+						RUNLIST_ENABLED);
+		}
+
+		nvgpu_mutex_release(&tsg->runlist->runlist_lock);
+
+		if (ret != -EAGAIN) {
+			break;
+		}
+
+		ret = nvgpu_cic_wait_for_stall_interrupts(g, preempt_retry_timeout);
+		if (ret != 0) {
+			nvgpu_log_info(g, "wait for stall interrupts failed %d", ret);
+		}
+	} while (--preempt_retry_count != 0U);
+
+	if (ret != 0) {
+		if (nvgpu_platform_is_silicon(g)) {
+			nvgpu_err(g, "preempt timed out for tsgid: %u, "
+			"ctxsw timeout will trigger recovery if needed",
+			tsg->tsgid);
+		} else {
+			nvgpu_rc_preempt_timeout(g, tsg);
+		}
+	}
+	return ret;
+}
+
+int nvgpu_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch)
+{
+	int err;
+	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
+
+	if (tsg != NULL) {
+		err = g->ops.fifo.preempt_tsg(ch->g, tsg);
+	} else {
+		err = g->ops.fifo.preempt_channel(ch->g, ch);
+	}
+
+	return err;
+}
+
+/* called from rc */
+int nvgpu_preempt_poll_tsg_on_pbdma(struct gk20a *g,
+		struct nvgpu_tsg *tsg)
+{
+	unsigned long runlist_served_pbdmas;
+	unsigned long pbdma_id_bit;
+	u32 tsgid, pbdma_id;
+
+	if (g->ops.fifo.preempt_poll_pbdma == NULL) {
+		return 0;
+	}
+
+	tsgid = tsg->tsgid;
+	runlist_served_pbdmas = tsg->runlist->pbdma_bitmask;
+
+	for_each_set_bit(pbdma_id_bit, &runlist_served_pbdmas,
+			 nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA)) {
+		pbdma_id = U32(pbdma_id_bit);
+		/*
+		 * If pbdma preempt fails the only option is to reset
+		 * GPU. Any sort of hang indicates the entire GPU’s
+		 * memory system would be blocked.
+		 */
+		if (g->ops.fifo.preempt_poll_pbdma(g, tsgid, pbdma_id) != 0) {
+			nvgpu_err(g, "PBDMA preempt failed");
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
+/*
+ * This should be called with runlist_lock held for all the
+ * runlists set in runlists_mask
+ */
+void nvgpu_fifo_preempt_runlists_for_rc(struct gk20a *g, u32 runlists_bitmask)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 i;
+#ifdef CONFIG_NVGPU_LS_PMU
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	int mutex_ret = 0;
+#endif
+
+	/* runlist_lock are locked by teardown and sched are disabled too */
+	nvgpu_log_fn(g, "preempt runlists_bitmask:0x%08x", runlists_bitmask);
+#ifdef CONFIG_NVGPU_LS_PMU
+	mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
+			PMU_MUTEX_ID_FIFO, &token);
+#endif
+
+	for (i = 0U; i < f->num_runlists; i++) {
+		struct nvgpu_runlist *runlist;
+
+		runlist = &f->active_runlists[i];
+
+		if ((BIT32(runlist->id) & runlists_bitmask) == 0U) {
+			continue;
+		}
+		/* issue runlist preempt */
+		g->ops.fifo.preempt_trigger(g, runlist->id,
+					ID_TYPE_RUNLIST);
+#ifdef CONFIG_NVGPU_RECOVERY
+		/*
+		 * Preemption will never complete in RC due to some
+		 * fatal condition. Do not poll for preemption to
+		 * complete. Reset engines served by runlists.
+		 */
+		runlist->reset_eng_bitmask = runlist->eng_bitmask;
+#endif
+	}
+
+#ifdef CONFIG_NVGPU_LS_PMU
+	if (mutex_ret == 0) {
+		int err = nvgpu_pmu_lock_release(g, g->pmu, PMU_MUTEX_ID_FIFO,
+				&token);
+		if (err != 0) {
+			nvgpu_err(g, "PMU_MUTEX_ID_FIFO not released err=%d",
+					err);
+		}
+	}
+#endif
+}
--- a/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c
+++ b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/log.h>
+#include <nvgpu/utils.h>
+#include <nvgpu/log2.h>
+#include <nvgpu/barrier.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/priv_cmdbuf.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/trace.h>
+#include <nvgpu/circ_buf.h>
+
+struct priv_cmd_entry {
+	struct nvgpu_mem *mem;
+	u32 off;	/* offset in mem, in u32 entries */
+	u32 fill_off;	/* write offset from off, in u32 entries */
+	u32 size;	/* in words */
+	u32 alloc_size;
+};
+
+struct priv_cmd_queue {
+	struct vm_gk20a *vm;
+	struct nvgpu_mem mem; /* pushbuf */
+	u32 size;	/* allocated length in words */
+	u32 put;	/* next entry will begin here */
+	u32 get;	/* next entry to free begins here */
+
+	/* an entry is a fragment of the pushbuf memory */
+	struct priv_cmd_entry *entries;
+	u32 entries_len; /* allocated length */
+	u32 entry_put;
+	u32 entry_get;
+};
+
+/* allocate private cmd buffer queue.
+   used for inserting commands before/after user submitted buffers. */
+int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm,
+	u32 job_count, struct priv_cmd_queue **queue)
+{
+	struct gk20a *g = vm->mm->g;
+	struct priv_cmd_queue *q;
+	u64 size, tmp_size;
+	int err = 0;
+	u32 wait_size, incr_size;
+	u32 mem_per_job;
+
+	/*
+	 * sema size is at least as much as syncpt size, but semas may not be
+	 * enabled in the build. If neither semas nor syncpts are enabled, priv
+	 * cmdbufs and as such kernel mode submits with job tracking won't be
+	 * supported.
+	 */
+#ifdef CONFIG_NVGPU_SW_SEMAPHORE
+	wait_size = g->ops.sync.sema.get_wait_cmd_size();
+	incr_size = g->ops.sync.sema.get_incr_cmd_size();
+#else
+	wait_size = g->ops.sync.syncpt.get_wait_cmd_size();
+	incr_size = g->ops.sync.syncpt.get_incr_cmd_size(true);
+#endif
+
+	/*
+	 * Compute the amount of priv_cmdbuf space we need. In general the
+	 * worst case is the kernel inserts both a semaphore pre-fence and
+	 * post-fence. Any sync-pt fences will take less memory so we can
+	 * ignore them unless they're the only supported type. Jobs can also
+	 * have more than one pre-fence but that's abnormal and we'll -EAGAIN
+	 * if such jobs would fill the queue.
+	 *
+	 * A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b,
+	 * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be
+	 * 10 words: all the same as an ACQ plus a non-stalling intr which is
+	 * another 2 words. In reality these numbers vary by chip but we'll use
+	 * 8 and 10 as examples.
+	 *
+	 * Given the job count, cmdbuf space is allocated such that each job
+	 * can get one wait command and one increment command:
+	 *
+	 *   job_count * (8 + 10) * 4 bytes
+	 *
+	 * These cmdbufs are inserted as gpfifo entries right before and after
+	 * the user submitted gpfifo entries per submit.
+	 *
+	 * One extra slot is added to the queue length so that the requested
+	 * job count can actually be allocated. This ring buffer implementation
+	 * is full when the number of consumed entries is one less than the
+	 * allocation size:
+	 *
+	 * alloc bytes = job_count * (wait + incr + 1) * slot in bytes
+	 */
+	mem_per_job = nvgpu_safe_mult_u32(
+			nvgpu_safe_add_u32(
+				nvgpu_safe_add_u32(wait_size, incr_size),
+				1U),
+			(u32)sizeof(u32));
+	/* both 32 bit and mem_per_job is small */
+	size = nvgpu_safe_mult_u64((u64)job_count, (u64)mem_per_job);
+
+	tmp_size = PAGE_ALIGN(roundup_pow_of_two(size));
+	if (tmp_size > U32_MAX) {
+		return -ERANGE;
+	}
+	size = (u32)tmp_size;
+
+	q = nvgpu_kzalloc(g, sizeof(*q));
+	if (q == NULL) {
+		return -ENOMEM;
+	}
+
+	q->vm = vm;
+
+	if (job_count > U32_MAX / 2U - 1U) {
+		err = -ERANGE;
+		goto err_free_queue;
+	}
+
+	/* One extra to account for the full condition: 2 * job_count + 1 */
+	q->entries_len = nvgpu_safe_mult_u32(2U,
+			nvgpu_safe_add_u32(job_count, 1U));
+	q->entries = nvgpu_vzalloc(g,
+			nvgpu_safe_mult_u64((u64)q->entries_len,
+				sizeof(*q->entries)));
+	if (q->entries == NULL) {
+		err = -ENOMEM;
+		goto err_free_queue;
+	}
+
+	err = nvgpu_dma_alloc_map_sys(vm, size, &q->mem);
+	if (err != 0) {
+		nvgpu_err(g, "%s: memory allocation failed", __func__);
+		goto err_free_entries;
+	}
+
+	tmp_size = q->mem.size / sizeof(u32);
+	nvgpu_assert(tmp_size <= U32_MAX);
+	q->size = (u32)tmp_size;
+
+	*queue = q;
+	return 0;
+err_free_entries:
+	nvgpu_vfree(g, q->entries);
+err_free_queue:
+	nvgpu_kfree(g, q);
+	return err;
+}
+
+void nvgpu_priv_cmdbuf_queue_free(struct priv_cmd_queue *q)
+{
+	struct vm_gk20a *vm = q->vm;
+	struct gk20a *g = vm->mm->g;
+
+	nvgpu_dma_unmap_free(vm, &q->mem);
+	nvgpu_vfree(g, q->entries);
+	nvgpu_kfree(g, q);
+}
+
+/* allocate a cmd buffer with given size. size is number of u32 entries */
+static int nvgpu_priv_cmdbuf_alloc_buf(struct priv_cmd_queue *q, u32 orig_size,
+			     struct priv_cmd_entry *e)
+{
+	struct gk20a *g = q->vm->mm->g;
+	u32 size = orig_size;
+	u32 free_count;
+
+	nvgpu_log_fn(g, "size %d", orig_size);
+
+	/*
+	 * If free space in the end is less than requested, increase the size
+	 * to make the real allocated space start from beginning. The hardware
+	 * expects each cmdbuf to be contiguous in the dma space.
+	 *
+	 * This too small extra space in the end may happen because the
+	 * requested wait and incr command buffers do not necessarily align
+	 * with the whole buffer capacity. They don't always align because the
+	 * buffer size is rounded to the next power of two and because not all
+	 * jobs necessarily use exactly one wait command.
+	 */
+	if (nvgpu_safe_add_u32(q->put, size) > q->size) {
+		size = orig_size + (q->size - q->put);
+	}
+
+	nvgpu_log_info(g, "priv cmd queue get:put %d:%d",
+			q->get, q->put);
+
+	nvgpu_assert(q->put < q->size);
+	nvgpu_assert(q->get < q->size);
+	nvgpu_assert(q->size > 0U);
+	free_count = (q->size - q->put + q->get - 1U) & (q->size - 1U);
+
+	if (size > free_count) {
+		return -EAGAIN;
+	}
+
+	e->fill_off = 0;
+	e->size = orig_size;
+	e->alloc_size = size;
+	e->mem = &q->mem;
+
+	/*
+	 * if we have increased size to skip free space in the end, set put
+	 * to beginning of cmd buffer + size, as if the prev put was at
+	 * position 0.
+	 */
+	if (size != orig_size) {
+		e->off = 0;
+		q->put = orig_size;
+	} else {
+		e->off = q->put;
+		q->put = (q->put + orig_size) & (q->size - 1U);
+	}
+
+	/* we already handled q->put + size > q->size so BUG_ON this */
+	BUG_ON(q->put > q->size);
+
+	nvgpu_log_fn(g, "done");
+
+	return 0;
+}
+
+int nvgpu_priv_cmdbuf_alloc(struct priv_cmd_queue *q, u32 size,
+			     struct priv_cmd_entry **e)
+{
+	u32 next_put = nvgpu_safe_add_u32(q->entry_put, 1U) % q->entries_len;
+	struct priv_cmd_entry *entry;
+	int err;
+
+	if (next_put == q->entry_get) {
+		return -EAGAIN;
+	}
+	entry = &q->entries[q->entry_put];
+
+	err = nvgpu_priv_cmdbuf_alloc_buf(q, size, entry);
+	if (err != 0) {
+		return err;
+	}
+
+	q->entry_put = next_put;
+	*e = entry;
+
+	return 0;
+}
+
+void nvgpu_priv_cmdbuf_rollback(struct priv_cmd_queue *q,
+		struct priv_cmd_entry *e)
+{
+	nvgpu_assert(q->put < q->size);
+	nvgpu_assert(q->size > 0U);
+	nvgpu_assert(e->alloc_size <= q->size);
+	q->put = (q->put + q->size - e->alloc_size) & (q->size - 1U);
+
+	(void)memset(e, 0, sizeof(*e));
+
+	nvgpu_assert(q->entry_put < q->entries_len);
+	nvgpu_assert(q->entries_len > 0U);
+	q->entry_put = (q->entry_put + q->entries_len - 1U)
+		% q->entries_len;
+}
+
+void nvgpu_priv_cmdbuf_free(struct priv_cmd_queue *q, struct priv_cmd_entry *e)
+{
+	struct gk20a *g = q->vm->mm->g;
+
+	if ((q->get != e->off) && e->off != 0U) {
+		nvgpu_err(g, "priv cmdbuf requests out-of-order");
+	}
+	nvgpu_assert(q->size > 0U);
+	q->get = nvgpu_safe_add_u32(e->off, e->size) & (q->size - 1U);
+	q->entry_get = nvgpu_safe_add_u32(q->entry_get, 1U) % q->entries_len;
+
+	(void)memset(e, 0, sizeof(*e));
+}
+
+void nvgpu_priv_cmdbuf_append(struct gk20a *g, struct priv_cmd_entry *e,
+		u32 *data, u32 entries)
+{
+	nvgpu_assert(e->fill_off + entries <= e->size);
+	nvgpu_mem_wr_n(g, e->mem, (e->off + e->fill_off) * sizeof(u32),
+			data, entries * sizeof(u32));
+	e->fill_off += entries;
+}
+
+void nvgpu_priv_cmdbuf_append_zeros(struct gk20a *g, struct priv_cmd_entry *e,
+		u32 entries)
+{
+	nvgpu_assert(e->fill_off + entries <= e->size);
+	nvgpu_memset(g, e->mem, (e->off + e->fill_off) * sizeof(u32),
+			0, entries * sizeof(u32));
+	e->fill_off += entries;
+}
+
+void nvgpu_priv_cmdbuf_finish(struct gk20a *g, struct priv_cmd_entry *e,
+		u64 *gva, u32 *size)
+{
+	/*
+	 * The size is written to the pushbuf entry, so make sure this buffer
+	 * is complete at this point. The responsibility of the channel sync is
+	 * to be consistent in allocation and usage, and the matching size and
+	 * add gops (e.g., get_wait_cmd_size, add_wait_cmd) help there.
+	 */
+	nvgpu_assert(e->fill_off == e->size);
+
+#ifdef CONFIG_NVGPU_TRACE
+	if (e->mem->aperture == APERTURE_SYSMEM) {
+		trace_gk20a_push_cmdbuf(g->name, 0, e->size, 0,
+				(u32 *)e->mem->cpu_va + e->off);
+	}
+#endif
+	*gva = nvgpu_safe_add_u64(e->mem->gpu_va,
+			nvgpu_safe_mult_u64((u64)e->off, sizeof(u32)));
+	*size = e->size;
+}
--- a/drivers/gpu/nvgpu/common/fifo/runlist.c
+++ b/drivers/gpu/nvgpu/common/fifo/runlist.c
@@ -0,0 +1,914 @@
+/*
+ * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/fifo.h>
+#include <nvgpu/engines.h>
+#include <nvgpu/device.h>
+#include <nvgpu/runlist.h>
+#include <nvgpu/ptimer.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/rc.h>
+#include <nvgpu/static_analysis.h>
+#ifdef CONFIG_NVGPU_LS_PMU
+#include <nvgpu/pmu/mutex.h>
+#endif
+
+void nvgpu_runlist_lock_active_runlists(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	struct nvgpu_runlist *runlist;
+	u32 i;
+
+	nvgpu_log_info(g, "acquire runlist_lock for active runlists");
+	for (i = 0; i < g->fifo.num_runlists; i++) {
+		runlist = &f->active_runlists[i];
+		nvgpu_mutex_acquire(&runlist->runlist_lock);
+	}
+}
+
+void nvgpu_runlist_unlock_active_runlists(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	struct nvgpu_runlist *runlist;
+	u32 i;
+
+	nvgpu_log_info(g, "release runlist_lock for active runlists");
+	for (i = 0; i < g->fifo.num_runlists; i++) {
+		runlist = &f->active_runlists[i];
+		nvgpu_mutex_release(&runlist->runlist_lock);
+	}
+}
+
+static u32 nvgpu_runlist_append_tsg(struct gk20a *g,
+		struct nvgpu_runlist *runlist,
+		u32 **runlist_entry,
+		u32 *entries_left,
+		struct nvgpu_tsg *tsg)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 runlist_entry_words = f->runlist_entry_size / (u32)sizeof(u32);
+	struct nvgpu_channel *ch;
+	u32 count = 0;
+	u32 timeslice;
+
+	nvgpu_log_fn(f->g, " ");
+
+	if (*entries_left == 0U) {
+		return RUNLIST_APPEND_FAILURE;
+	}
+
+	/* add TSG entry */
+	nvgpu_log_info(g, "add TSG %d to runlist", tsg->tsgid);
+
+	/*
+	 * timeslice is measured with PTIMER.
+	 * On some platforms, PTIMER is lower than 1GHz.
+	 */
+	timeslice = scale_ptimer(tsg->timeslice_us,
+			ptimer_scalingfactor10x(g->ptimer_src_freq));
+
+	g->ops.runlist.get_tsg_entry(tsg, *runlist_entry, timeslice);
+
+	nvgpu_log_info(g, "tsg rl entries left %d runlist [0] %x [1] %x",
+			*entries_left,
+			(*runlist_entry)[0], (*runlist_entry)[1]);
+	*runlist_entry += runlist_entry_words;
+	count++;
+	(*entries_left)--;
+
+	nvgpu_rwsem_down_read(&tsg->ch_list_lock);
+	/* add runnable channels bound to this TSG */
+	nvgpu_list_for_each_entry(ch, &tsg->ch_list,
+			nvgpu_channel, ch_entry) {
+		if (!nvgpu_test_bit(ch->chid,
+			      runlist->active_channels)) {
+			continue;
+		}
+
+		if (*entries_left == 0U) {
+			nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+			return RUNLIST_APPEND_FAILURE;
+		}
+
+		nvgpu_log_info(g, "add channel %d to runlist",
+			ch->chid);
+		g->ops.runlist.get_ch_entry(ch, *runlist_entry);
+		nvgpu_log_info(g, "rl entries left %d runlist [0] %x [1] %x",
+			*entries_left,
+			(*runlist_entry)[0], (*runlist_entry)[1]);
+		count = nvgpu_safe_add_u32(count, 1U);
+		*runlist_entry += runlist_entry_words;
+		(*entries_left)--;
+	}
+	nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+
+	return count;
+}
+
+
+static u32 nvgpu_runlist_append_prio(struct nvgpu_fifo *f,
+				struct nvgpu_runlist *runlist,
+				u32 **runlist_entry,
+				u32 *entries_left,
+				u32 interleave_level)
+{
+	u32 count = 0;
+	unsigned long tsgid;
+
+	nvgpu_log_fn(f->g, " ");
+
+	for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
+		struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid);
+		u32 entries;
+
+		if (tsg->interleave_level == interleave_level) {
+			entries = nvgpu_runlist_append_tsg(f->g, runlist,
+					runlist_entry, entries_left, tsg);
+			if (entries == RUNLIST_APPEND_FAILURE) {
+				return RUNLIST_APPEND_FAILURE;
+			}
+			count += entries;
+		}
+	}
+
+	return count;
+}
+
+static u32 nvgpu_runlist_append_hi(struct nvgpu_fifo *f,
+				struct nvgpu_runlist *runlist,
+				u32 **runlist_entry,
+				u32 *entries_left)
+{
+	nvgpu_log_fn(f->g, " ");
+
+	/*
+	 * No higher levels - this is where the "recursion" ends; just add all
+	 * active TSGs at this level.
+	 */
+	return nvgpu_runlist_append_prio(f, runlist, runlist_entry,
+			entries_left,
+			NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH);
+}
+
+static u32 nvgpu_runlist_append_med(struct nvgpu_fifo *f,
+				struct nvgpu_runlist *runlist,
+				u32 **runlist_entry,
+				u32 *entries_left)
+{
+	u32 count = 0;
+	unsigned long tsgid;
+
+	nvgpu_log_fn(f->g, " ");
+
+	for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
+		struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid);
+		u32 entries;
+
+		if (tsg->interleave_level !=
+				NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM) {
+			continue;
+		}
+
+		/* LEVEL_MEDIUM list starts with a LEVEL_HIGH, if any */
+
+		entries = nvgpu_runlist_append_hi(f, runlist,
+				runlist_entry, entries_left);
+		if (entries == RUNLIST_APPEND_FAILURE) {
+			return RUNLIST_APPEND_FAILURE;
+		}
+		count += entries;
+
+		entries = nvgpu_runlist_append_tsg(f->g, runlist,
+				runlist_entry, entries_left, tsg);
+		if (entries == RUNLIST_APPEND_FAILURE) {
+			return RUNLIST_APPEND_FAILURE;
+		}
+		count += entries;
+	}
+
+	return count;
+}
+
+static u32 nvgpu_runlist_append_low(struct nvgpu_fifo *f,
+				struct nvgpu_runlist *runlist,
+				u32 **runlist_entry,
+				u32 *entries_left)
+{
+	u32 count = 0;
+	unsigned long tsgid;
+
+	nvgpu_log_fn(f->g, " ");
+
+	for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
+		struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid);
+		u32 entries;
+
+		if (tsg->interleave_level !=
+				NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW) {
+			continue;
+		}
+
+		/* The medium level starts with the highs, if any. */
+
+		entries = nvgpu_runlist_append_med(f, runlist,
+				runlist_entry, entries_left);
+		if (entries == RUNLIST_APPEND_FAILURE) {
+			return RUNLIST_APPEND_FAILURE;
+		}
+		count += entries;
+
+		entries = nvgpu_runlist_append_hi(f, runlist,
+				runlist_entry, entries_left);
+		if (entries == RUNLIST_APPEND_FAILURE) {
+			return RUNLIST_APPEND_FAILURE;
+		}
+		count += entries;
+
+		entries = nvgpu_runlist_append_tsg(f->g, runlist,
+				runlist_entry, entries_left, tsg);
+		if (entries == RUNLIST_APPEND_FAILURE) {
+			return RUNLIST_APPEND_FAILURE;
+		}
+		count += entries;
+	}
+
+	if (count == 0U) {
+		/*
+		 * No transitions to fill with higher levels, so add
+		 * the next level once. If that's empty too, we have only
+		 * LEVEL_HIGH jobs.
+		 */
+		count = nvgpu_runlist_append_med(f, runlist,
+				runlist_entry, entries_left);
+		if (count == 0U) {
+			count = nvgpu_runlist_append_hi(f, runlist,
+					runlist_entry, entries_left);
+		}
+	}
+
+	return count;
+}
+
+static u32 nvgpu_runlist_append_flat(struct nvgpu_fifo *f,
+				struct nvgpu_runlist *runlist,
+				u32 **runlist_entry,
+				u32 *entries_left)
+{
+	u32 count = 0, entries, i;
+
+	nvgpu_log_fn(f->g, " ");
+
+	/* Group by priority but don't interleave. High comes first. */
+
+	for (i = 0; i < NVGPU_FIFO_RUNLIST_INTERLEAVE_NUM_LEVELS; i++) {
+		u32 level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH - i;
+
+		entries = nvgpu_runlist_append_prio(f, runlist, runlist_entry,
+				entries_left, level);
+		if (entries == RUNLIST_APPEND_FAILURE) {
+			return RUNLIST_APPEND_FAILURE;
+		}
+		count += entries;
+	}
+
+	return count;
+}
+
+u32 nvgpu_runlist_construct_locked(struct nvgpu_fifo *f,
+				struct nvgpu_runlist *runlist,
+				u32 buf_id,
+				u32 max_entries)
+{
+	u32 *runlist_entry_base = runlist->mem[buf_id].cpu_va;
+
+	/*
+	 * The entry pointer and capacity counter that live on the stack here
+	 * keep track of the current position and the remaining space when tsg
+	 * and channel entries are ultimately appended.
+	 */
+	if (f->g->runlist_interleave) {
+		return nvgpu_runlist_append_low(f, runlist,
+				&runlist_entry_base, &max_entries);
+	} else {
+		return nvgpu_runlist_append_flat(f, runlist,
+				&runlist_entry_base, &max_entries);
+	}
+}
+
+static bool nvgpu_runlist_modify_active_locked(struct gk20a *g,
+					       struct nvgpu_runlist *runlist,
+					       struct nvgpu_channel *ch, bool add)
+{
+	struct nvgpu_tsg *tsg = NULL;
+
+	tsg = nvgpu_tsg_from_ch(ch);
+
+	if (tsg == NULL) {
+		/*
+		 * Unsupported condition, but shouldn't break anything. Warn
+		 * and tell the caller that nothing has changed.
+		 */
+		nvgpu_warn(g, "Bare channel in runlist update");
+		return false;
+	}
+
+	if (add) {
+		if (nvgpu_test_and_set_bit(ch->chid,
+				runlist->active_channels)) {
+			/* was already there */
+			return false;
+		} else {
+			/* new, and belongs to a tsg */
+			nvgpu_set_bit(tsg->tsgid, runlist->active_tsgs);
+			tsg->num_active_channels = nvgpu_safe_add_u32(
+					tsg->num_active_channels, 1U);
+		}
+	} else {
+		if (!nvgpu_test_and_clear_bit(ch->chid,
+				runlist->active_channels)) {
+			/* wasn't there */
+			return false;
+		} else {
+			tsg->num_active_channels = nvgpu_safe_sub_u32(
+				tsg->num_active_channels, 1U);
+			if (tsg->num_active_channels == 0U) {
+				/* was the only member of this tsg */
+				nvgpu_clear_bit(tsg->tsgid,
+						runlist->active_tsgs);
+			}
+		}
+	}
+
+	return true;
+}
+
+static int nvgpu_runlist_reconstruct_locked(struct gk20a *g,
+					    struct nvgpu_runlist *runlist,
+					    u32 buf_id, bool add_entries)
+{
+	u32 num_entries;
+	struct nvgpu_fifo *f = &g->fifo;
+
+	rl_dbg(g, "[%u] switch to new buffer 0x%16llx",
+		runlist->id, (u64)nvgpu_mem_get_addr(g, &runlist->mem[buf_id]));
+
+	if (!add_entries) {
+		runlist->count = 0;
+		return 0;
+	}
+
+	num_entries = nvgpu_runlist_construct_locked(f, runlist, buf_id,
+						f->num_runlist_entries);
+	if (num_entries == RUNLIST_APPEND_FAILURE) {
+		return -E2BIG;
+	}
+	runlist->count = num_entries;
+NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
+NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
+NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
+	WARN_ON(runlist->count > f->num_runlist_entries);
+NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
+NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
+NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
+
+	return 0;
+}
+
+int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl,
+				struct nvgpu_channel *ch, bool add,
+				bool wait_for_finish)
+{
+	int ret = 0;
+	u32 buf_id;
+	bool add_entries;
+
+	if (ch != NULL) {
+		bool update = nvgpu_runlist_modify_active_locked(g, rl, ch, add);
+		if (!update) {
+			/* no change in runlist contents */
+			return 0;
+		}
+		/* had a channel to update, so reconstruct */
+		add_entries = true;
+	} else {
+		/* no channel; add means update all, !add means clear all */
+		add_entries = add;
+	}
+
+	/* double buffering, swap to next */
+	buf_id = (rl->cur_buffer == 0U) ? 1U : 0U;
+
+	ret = nvgpu_runlist_reconstruct_locked(g, rl, buf_id, add_entries);
+	if (ret != 0) {
+		return ret;
+	}
+
+	g->ops.runlist.hw_submit(g, rl->id, rl->count, buf_id);
+
+	if (wait_for_finish) {
+		ret = g->ops.runlist.wait_pending(g, rl->id);
+
+		if (ret == -ETIMEDOUT) {
+			nvgpu_err(g, "runlist %d update timeout", rl->id);
+			/* trigger runlist update timeout recovery */
+			return ret;
+
+		} else {
+			if (ret == -EINTR) {
+				nvgpu_err(g, "runlist update interrupted");
+			}
+		}
+	}
+
+	rl->cur_buffer = buf_id;
+
+	return ret;
+}
+
+#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
+/* trigger host to expire current timeslice and reschedule runlist from front */
+int nvgpu_runlist_reschedule(struct nvgpu_channel *ch, bool preempt_next,
+		bool wait_preempt)
+{
+	struct gk20a *g = ch->g;
+	struct nvgpu_runlist *runlist;
+#ifdef CONFIG_NVGPU_LS_PMU
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	int mutex_ret = 0;
+#endif
+	int ret = 0;
+
+	runlist = ch->runlist;
+	if (nvgpu_mutex_tryacquire(&runlist->runlist_lock) == 0) {
+		return -EBUSY;
+	}
+#ifdef CONFIG_NVGPU_LS_PMU
+	mutex_ret = nvgpu_pmu_lock_acquire(
+		g, g->pmu, PMU_MUTEX_ID_FIFO, &token);
+#endif
+
+	g->ops.runlist.hw_submit(
+		g, runlist->id, runlist->count, runlist->cur_buffer);
+
+	if (preempt_next) {
+		if (g->ops.runlist.reschedule_preempt_next_locked(ch,
+				wait_preempt) != 0) {
+			nvgpu_err(g, "reschedule preempt next failed");
+		}
+	}
+
+	if (g->ops.runlist.wait_pending(g, runlist->id) != 0) {
+		nvgpu_err(g, "wait pending failed for runlist %u",
+				runlist->id);
+	}
+#ifdef CONFIG_NVGPU_LS_PMU
+	if (mutex_ret == 0) {
+		if (nvgpu_pmu_lock_release(g, g->pmu,
+				PMU_MUTEX_ID_FIFO, &token) != 0) {
+			nvgpu_err(g, "failed to release PMU lock");
+		}
+	}
+#endif
+	nvgpu_mutex_release(&runlist->runlist_lock);
+
+	return ret;
+}
+#endif
+
+/* add/remove a channel from runlist
+   special cases below: runlist->active_channels will NOT be changed.
+   (ch == NULL && !add) means remove all active channels from runlist.
+   (ch == NULL &&  add) means restore all active channels on runlist. */
+static int nvgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl,
+				   struct nvgpu_channel *ch,
+				   bool add, bool wait_for_finish)
+{
+#ifdef CONFIG_NVGPU_LS_PMU
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	int mutex_ret = 0;
+#endif
+	int ret = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	nvgpu_mutex_acquire(&rl->runlist_lock);
+#ifdef CONFIG_NVGPU_LS_PMU
+	mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
+		PMU_MUTEX_ID_FIFO, &token);
+#endif
+	ret = nvgpu_runlist_update_locked(g, rl, ch, add, wait_for_finish);
+#ifdef CONFIG_NVGPU_LS_PMU
+	if (mutex_ret == 0) {
+		if (nvgpu_pmu_lock_release(g, g->pmu,
+				PMU_MUTEX_ID_FIFO, &token) != 0) {
+			nvgpu_err(g, "failed to release PMU lock");
+		}
+	}
+#endif
+	nvgpu_mutex_release(&rl->runlist_lock);
+
+	if (ret == -ETIMEDOUT) {
+		nvgpu_rc_runlist_update(g, rl->id);
+	}
+
+	return ret;
+}
+
+int nvgpu_runlist_update(struct gk20a *g, struct nvgpu_runlist *rl,
+			 struct nvgpu_channel *ch,
+			 bool add, bool wait_for_finish)
+{
+	nvgpu_assert(ch != NULL);
+
+	return nvgpu_runlist_do_update(g, rl, ch, add, wait_for_finish);
+}
+
+int nvgpu_runlist_reload(struct gk20a *g, struct nvgpu_runlist *rl,
+			      bool add, bool wait_for_finish)
+{
+	return nvgpu_runlist_do_update(g, rl, NULL, add, wait_for_finish);
+}
+
+int nvgpu_runlist_reload_ids(struct gk20a *g, u32 runlist_ids, bool add)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	int ret = -EINVAL;
+	unsigned long runlist_id = 0;
+	int errcode;
+	unsigned long ulong_runlist_ids = (unsigned long)runlist_ids;
+
+	if (g == NULL) {
+		goto end;
+	}
+
+	ret = 0;
+	for_each_set_bit(runlist_id, &ulong_runlist_ids, 32U) {
+		/* Capture the last failure error code */
+		errcode = g->ops.runlist.reload(g,
+						f->runlists[runlist_id], add, true);
+		if (errcode != 0) {
+			nvgpu_err(g,
+				"failed to update_runlist %lu %d",
+				runlist_id, errcode);
+			ret = errcode;
+		}
+	}
+end:
+	return ret;
+}
+
+const char *nvgpu_runlist_interleave_level_name(u32 interleave_level)
+{
+	const char *ret_string = NULL;
+
+	switch (interleave_level) {
+	case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW:
+		ret_string = "LOW";
+		break;
+
+	case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
+		ret_string = "MEDIUM";
+		break;
+
+	case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH:
+		ret_string = "HIGH";
+		break;
+
+	default:
+		ret_string = "?";
+		break;
+	}
+
+	return ret_string;
+}
+
+void nvgpu_runlist_set_state(struct gk20a *g, u32 runlists_mask,
+		u32 runlist_state)
+{
+#ifdef CONFIG_NVGPU_LS_PMU
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	int mutex_ret = 0;
+#endif
+	nvgpu_log(g, gpu_dbg_info, "runlist mask = 0x%08x state = 0x%08x",
+			runlists_mask, runlist_state);
+
+#ifdef CONFIG_NVGPU_LS_PMU
+	mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
+		PMU_MUTEX_ID_FIFO, &token);
+#endif
+	g->ops.runlist.write_state(g, runlists_mask, runlist_state);
+#ifdef CONFIG_NVGPU_LS_PMU
+	if (mutex_ret == 0) {
+		if (nvgpu_pmu_lock_release(g, g->pmu,
+				PMU_MUTEX_ID_FIFO, &token) != 0) {
+			nvgpu_err(g, "failed to release PMU lock");
+		}
+	}
+#endif
+}
+
+void nvgpu_runlist_cleanup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 i, j;
+	struct nvgpu_runlist *runlist;
+
+	if ((f->runlists == NULL) || (f->active_runlists == NULL)) {
+		return;
+	}
+
+	g = f->g;
+
+	for (i = 0; i < f->num_runlists; i++) {
+		runlist = &f->active_runlists[i];
+		for (j = 0; j < MAX_RUNLIST_BUFFERS; j++) {
+			nvgpu_dma_free(g, &runlist->mem[j]);
+		}
+
+		nvgpu_kfree(g, runlist->active_channels);
+		runlist->active_channels = NULL;
+
+		nvgpu_kfree(g, runlist->active_tsgs);
+		runlist->active_tsgs = NULL;
+
+		nvgpu_mutex_destroy(&runlist->runlist_lock);
+		f->runlists[runlist->id] = NULL;
+	}
+
+	nvgpu_kfree(g, f->active_runlists);
+	f->active_runlists = NULL;
+	f->num_runlists = 0;
+	nvgpu_kfree(g, f->runlists);
+	f->runlists = NULL;
+	f->max_runlists = 0;
+}
+
+void nvgpu_runlist_init_enginfo(struct gk20a *g, struct nvgpu_fifo *f)
+{
+	struct nvgpu_runlist *runlist;
+	const struct nvgpu_device *dev;
+	u32 i, j;
+
+	nvgpu_log_fn(g, " ");
+
+	if (g->is_virtual) {
+		return;
+	}
+
+	for (i = 0; i < f->num_runlists; i++) {
+		runlist = &f->active_runlists[i];
+
+		(void) g->ops.fifo.find_pbdma_for_runlist(g,
+						runlist->id,
+						&runlist->pbdma_bitmask);
+		nvgpu_log(g, gpu_dbg_info, "runlist %d: pbdma bitmask 0x%x",
+				 runlist->id, runlist->pbdma_bitmask);
+
+		for (j = 0; j < f->num_engines; j++) {
+			dev = f->active_engines[j];
+
+			if (dev->runlist_id == runlist->id) {
+				runlist->eng_bitmask |= BIT32(dev->engine_id);
+			}
+		}
+		nvgpu_log(g, gpu_dbg_info, "runlist %d: act eng bitmask 0x%x",
+				 runlist->id, runlist->eng_bitmask);
+	}
+
+	nvgpu_log_fn(g, "done");
+}
+
+static int nvgpu_init_active_runlist_mapping(struct gk20a *g)
+{
+	struct nvgpu_runlist *runlist;
+	struct nvgpu_fifo *f = &g->fifo;
+	unsigned int runlist_id;
+	size_t runlist_size;
+	u32 i, j;
+	int err = 0;
+
+	rl_dbg(g, "Building active runlist map.");
+
+	/*
+	 * In most case we want to loop through active runlists only. Here
+	 * we need to loop through all possible runlists, to build the mapping
+	 * between runlists[runlist_id] and active_runlists[i].
+	 */
+	i = 0U;
+	for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
+		if (!nvgpu_engine_is_valid_runlist_id(g, runlist_id)) {
+			/* skip inactive runlist */
+			rl_dbg(g, "  Skipping invalid runlist: %d", runlist_id);
+			continue;
+		}
+
+		rl_dbg(g, "  Configuring HW runlist: %u", runlist_id);
+		rl_dbg(g, "  SW runlist index to HW: %u -> %u", i, runlist_id);
+
+		runlist = &f->active_runlists[i];
+		runlist->id = runlist_id;
+		f->runlists[runlist_id] = runlist;
+		i = nvgpu_safe_add_u32(i, 1U);
+
+		runlist->active_channels =
+			nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
+						      BITS_PER_BYTE));
+		if (runlist->active_channels == NULL) {
+			err = -ENOMEM;
+			goto clean_up_runlist;
+		}
+
+		runlist->active_tsgs =
+			nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
+						      BITS_PER_BYTE));
+		if (runlist->active_tsgs == NULL) {
+			err = -ENOMEM;
+			goto clean_up_runlist;
+		}
+
+		runlist_size = (size_t)f->runlist_entry_size *
+				(size_t)f->num_runlist_entries;
+		rl_dbg(g, "    RL entries: %d", f->num_runlist_entries);
+		rl_dbg(g, "    RL size %zu", runlist_size);
+
+		for (j = 0; j < MAX_RUNLIST_BUFFERS; j++) {
+			err = nvgpu_dma_alloc_flags_sys(g,
+					g->is_virtual ?
+					  0ULL : NVGPU_DMA_PHYSICALLY_ADDRESSED,
+					runlist_size,
+					&runlist->mem[j]);
+			if (err != 0) {
+				nvgpu_err(g, "memory allocation failed");
+				err = -ENOMEM;
+				goto clean_up_runlist;
+			}
+		}
+
+		nvgpu_mutex_init(&runlist->runlist_lock);
+
+		/*
+                 * None of buffers is pinned if this value doesn't change.
+		 * Otherwise, one of them (cur_buffer) must have been pinned.
+                 */
+		runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
+	}
+
+	return 0;
+
+clean_up_runlist:
+	return err;
+}
+
+int nvgpu_runlist_setup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 num_runlists = 0U;
+	unsigned int runlist_id;
+	int err = 0;
+
+	rl_dbg(g, "Initializing Runlists");
+
+	nvgpu_spinlock_init(&f->runlist_submit_lock);
+
+	f->runlist_entry_size = g->ops.runlist.entry_size(g);
+	f->num_runlist_entries = g->ops.runlist.length_max(g);
+	f->max_runlists = g->ops.runlist.count_max(g);
+
+	f->runlists = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(
+				sizeof(*f->runlists), f->max_runlists));
+	if (f->runlists == NULL) {
+		err = -ENOMEM;
+		goto clean_up_runlist;
+	}
+
+	for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
+		if (nvgpu_engine_is_valid_runlist_id(g, runlist_id)) {
+			num_runlists = nvgpu_safe_add_u32(num_runlists, 1U);
+		}
+	}
+	f->num_runlists = num_runlists;
+
+	f->active_runlists = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(
+			 sizeof(*f->active_runlists), num_runlists));
+	if (f->active_runlists == NULL) {
+		err = -ENOMEM;
+		goto clean_up_runlist;
+	}
+
+
+	rl_dbg(g, "  Max runlists:    %u", f->max_runlists);
+	rl_dbg(g, "  Active runlists: %u", f->num_runlists);
+	rl_dbg(g, "  RL entry size:   %u bytes", f->runlist_entry_size);
+	rl_dbg(g, "  Max RL entries:  %u", f->num_runlist_entries);
+
+	err = nvgpu_init_active_runlist_mapping(g);
+	if (err != 0) {
+		goto clean_up_runlist;
+	}
+
+	g->ops.runlist.init_enginfo(g, f);
+	return 0;
+
+clean_up_runlist:
+	nvgpu_runlist_cleanup_sw(g);
+	rl_dbg(g, "fail");
+	return err;
+}
+
+u32 nvgpu_runlist_get_runlists_mask(struct gk20a *g, u32 id,
+	unsigned int id_type, u32 act_eng_bitmask, u32 pbdma_bitmask)
+{
+	u32 i, runlists_mask = 0;
+	struct nvgpu_fifo *f = &g->fifo;
+	struct nvgpu_runlist *runlist;
+
+	bool bitmask_disabled = ((act_eng_bitmask == 0U) &&
+				(pbdma_bitmask == 0U));
+
+	/* engine and/or pbdma ids are known */
+	if (!bitmask_disabled) {
+		for (i = 0U; i < f->num_runlists; i++) {
+			runlist = &f->active_runlists[i];
+
+			if ((runlist->eng_bitmask & act_eng_bitmask) != 0U) {
+				runlists_mask |= BIT32(runlist->id);
+			}
+
+			if ((runlist->pbdma_bitmask & pbdma_bitmask) != 0U) {
+				runlists_mask |= BIT32(runlist->id);
+			}
+		}
+	}
+
+	if (id_type != ID_TYPE_UNKNOWN) {
+		if (id_type == ID_TYPE_TSG) {
+			runlist = f->tsg[id].runlist;
+		} else {
+			runlist = f->channel[id].runlist;
+		}
+
+		if (runlist == NULL) {
+			/* Warning on Linux, real assert on QNX. */
+			nvgpu_assert(runlist != NULL);
+		} else {
+			runlists_mask |= BIT32(runlist->id);
+		}
+	} else {
+		if (bitmask_disabled) {
+			nvgpu_log(g, gpu_dbg_info, "id_type_unknown, engine "
+				"and pbdma ids are unknown");
+
+			for (i = 0U; i < f->num_runlists; i++) {
+				runlist = &f->active_runlists[i];
+
+				runlists_mask |= BIT32(runlist->id);
+			}
+		} else {
+			nvgpu_log(g, gpu_dbg_info, "id_type_unknown, engine "
+				"and/or pbdma ids are known");
+		}
+	}
+
+	nvgpu_log(g, gpu_dbg_info, "runlists_mask = 0x%08x", runlists_mask);
+	return runlists_mask;
+}
+
+void nvgpu_runlist_unlock_runlists(struct gk20a *g, u32 runlists_mask)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	struct nvgpu_runlist *runlist;
+	u32 i;
+
+	nvgpu_log_info(g, "release runlist_lock for runlists set in "
+				"runlists_mask: 0x%08x", runlists_mask);
+
+	for (i = 0U; i < f->num_runlists; i++) {
+		runlist = &f->active_runlists[i];
+
+		if ((BIT32(i) & runlists_mask) != 0U) {
+			nvgpu_mutex_release(&runlist->runlist_lock);
+		}
+	}
+}
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -0,0 +1,837 @@
+/*
+ * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/nvgpu_init.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/ltc.h>
+#include <nvgpu/os_sched.h>
+#include <nvgpu/utils.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/channel_sync.h>
+#include <nvgpu/channel_sync_syncpt.h>
+#include <nvgpu/watchdog.h>
+#include <nvgpu/job.h>
+#include <nvgpu/priv_cmdbuf.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/fence.h>
+#include <nvgpu/swprofile.h>
+#include <nvgpu/vpr.h>
+#include <nvgpu/trace.h>
+#include <nvgpu/nvhost.h>
+#include <nvgpu/user_fence.h>
+
+#include <nvgpu/fifo/swprofile.h>
+
+/*
+ * We might need two extra gpfifo entries per submit - one for pre fence and
+ * one for post fence.
+ */
+#define EXTRA_GPFIFO_ENTRIES 2U
+
+static int nvgpu_submit_create_wait_cmd(struct nvgpu_channel *c,
+		struct nvgpu_channel_fence *fence,
+		struct priv_cmd_entry **wait_cmd, bool flag_sync_fence)
+{
+	/*
+	 * A single input sync fd may contain multiple fences. The preallocated
+	 * priv cmdbuf space allows exactly one per submit in the worst case.
+	 * Require at most one wait for consistent deterministic submits; if
+	 * there are more and no space, we'll -EAGAIN in nondeterministic mode.
+	 */
+	u32 max_wait_cmds = nvgpu_channel_is_deterministic(c) ?
+		1U : 0U;
+	int err;
+
+	if (flag_sync_fence) {
+		nvgpu_assert(fence->id <= (u32)INT_MAX);
+		err = nvgpu_channel_sync_wait_fence_fd(c->sync,
+			(int)fence->id, wait_cmd, max_wait_cmds);
+	} else {
+		struct nvgpu_channel_sync_syncpt *sync_syncpt;
+
+		sync_syncpt = nvgpu_channel_sync_to_syncpt(c->sync);
+		if (sync_syncpt != NULL) {
+			err = nvgpu_channel_sync_wait_syncpt(sync_syncpt,
+					fence->id, fence->value, wait_cmd);
+		} else {
+			err = -EINVAL;
+		}
+	}
+
+	return err;
+}
+
+static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c,
+		struct priv_cmd_entry **incr_cmd,
+		struct nvgpu_fence_type *post_fence, bool flag_fence_get,
+		bool need_wfi, bool need_sync_fence)
+{
+	int err;
+
+	if (flag_fence_get) {
+		err = nvgpu_channel_sync_incr_user(c->sync, incr_cmd,
+				post_fence, need_wfi, need_sync_fence);
+	} else {
+		err = nvgpu_channel_sync_incr(c->sync, incr_cmd,
+				post_fence, need_sync_fence);
+	}
+
+	return err;
+}
+
+/*
+ * Handle the submit synchronization - pre-fences and post-fences.
+ */
+static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
+				      struct nvgpu_channel_fence *fence,
+				      struct nvgpu_channel_job *job,
+				      u32 flags)
+{
+	struct gk20a *g = c->g;
+	bool need_sync_fence;
+	bool new_sync_created = false;
+	int err = 0;
+	bool need_wfi = (flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI) == 0U;
+	bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U;
+	bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U;
+	bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U;
+
+	if (g->aggressive_sync_destroy_thresh != 0U) {
+		nvgpu_mutex_acquire(&c->sync_lock);
+		if (c->sync == NULL) {
+			c->sync = nvgpu_channel_sync_create(c);
+			if (c->sync == NULL) {
+				err = -ENOMEM;
+				goto clean_up_unlock;
+			}
+			new_sync_created = true;
+		}
+		nvgpu_channel_sync_get_ref(c->sync);
+	}
+
+	if ((g->ops.channel.set_syncpt != NULL) && new_sync_created) {
+		err = g->ops.channel.set_syncpt(c);
+		if (err != 0) {
+			goto clean_up_put_sync;
+		}
+	}
+
+	/*
+	 * Optionally insert syncpt/semaphore wait in the beginning of gpfifo
+	 * submission when user requested.
+	 */
+	if (flag_fence_wait) {
+		err = nvgpu_submit_create_wait_cmd(c, fence, &job->wait_cmd,
+				flag_sync_fence);
+		if (err != 0) {
+			goto clean_up_put_sync;
+		}
+	}
+
+	need_sync_fence = flag_fence_get && flag_sync_fence;
+
+	/*
+	 * Always generate an increment at the end of a GPFIFO submission. When
+	 * we do job tracking, post fences are needed for various reasons even
+	 * if not requested by user.
+	 */
+	err = nvgpu_submit_create_incr_cmd(c, &job->incr_cmd, &job->post_fence,
+			flag_fence_get, need_wfi, need_sync_fence);
+	if (err != 0) {
+		goto clean_up_wait_cmd;
+	}
+
+	if (g->aggressive_sync_destroy_thresh != 0U) {
+		nvgpu_mutex_release(&c->sync_lock);
+	}
+	return 0;
+
+clean_up_wait_cmd:
+	if (job->wait_cmd != NULL) {
+		nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd);
+	}
+	job->wait_cmd = NULL;
+clean_up_put_sync:
+	if (g->aggressive_sync_destroy_thresh != 0U) {
+		if (nvgpu_channel_sync_put_ref_and_check(c->sync)
+		    && g->aggressive_sync_destroy) {
+			nvgpu_channel_sync_destroy(c->sync);
+		}
+	}
+clean_up_unlock:
+	if (g->aggressive_sync_destroy_thresh != 0U) {
+		nvgpu_mutex_release(&c->sync_lock);
+	}
+	return err;
+}
+
+static void nvgpu_submit_append_priv_cmdbuf(struct nvgpu_channel *c,
+		struct priv_cmd_entry *cmd)
+{
+	struct gk20a *g = c->g;
+	struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
+	struct nvgpu_gpfifo_entry gpfifo_entry;
+	u64 gva;
+	u32 size;
+
+	nvgpu_priv_cmdbuf_finish(g, cmd, &gva, &size);
+	g->ops.pbdma.format_gpfifo_entry(g, &gpfifo_entry, gva, size);
+
+	nvgpu_mem_wr_n(g, gpfifo_mem,
+			c->gpfifo.put * (u32)sizeof(gpfifo_entry),
+			&gpfifo_entry, (u32)sizeof(gpfifo_entry));
+
+	c->gpfifo.put = (c->gpfifo.put + 1U) & (c->gpfifo.entry_num - 1U);
+}
+
+static int nvgpu_submit_append_gpfifo_user_direct(struct nvgpu_channel *c,
+		struct nvgpu_gpfifo_userdata userdata,
+		u32 num_entries)
+{
+	struct gk20a *g = c->g;
+	struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va;
+	u32 gpfifo_size = c->gpfifo.entry_num;
+	u32 len = num_entries;
+	u32 start = c->gpfifo.put;
+	u32 end = start + len; /* exclusive */
+	int err;
+
+	nvgpu_speculation_barrier();
+	if (end > gpfifo_size) {
+		/* wrap-around */
+		u32 length0 = gpfifo_size - start;
+		u32 length1 = len - length0;
+
+		err = g->os_channel.copy_user_gpfifo(
+				&gpfifo_cpu[start], userdata,
+				0, length0);
+		if (err != 0) {
+			return err;
+		}
+
+		err = g->os_channel.copy_user_gpfifo(
+				gpfifo_cpu, userdata,
+				length0, length1);
+		if (err != 0) {
+			return err;
+		}
+	} else {
+		err = g->os_channel.copy_user_gpfifo(
+				&gpfifo_cpu[start], userdata,
+				0, len);
+		if (err != 0) {
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void nvgpu_submit_append_gpfifo_common(struct nvgpu_channel *c,
+		struct nvgpu_gpfifo_entry *src, u32 num_entries)
+{
+	struct gk20a *g = c->g;
+	struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
+	/* in bytes */
+	u32 gpfifo_size =
+		c->gpfifo.entry_num * (u32)sizeof(struct nvgpu_gpfifo_entry);
+	u32 len = num_entries * (u32)sizeof(struct nvgpu_gpfifo_entry);
+	u32 start = c->gpfifo.put * (u32)sizeof(struct nvgpu_gpfifo_entry);
+	u32 end = start + len; /* exclusive */
+
+	if (end > gpfifo_size) {
+		/* wrap-around */
+		u32 length0 = gpfifo_size - start;
+		u32 length1 = len - length0;
+		struct nvgpu_gpfifo_entry *src2 = &src[length0];
+
+		nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0);
+		nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1);
+	} else {
+		nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len);
+	}
+}
+
+/*
+ * Copy source gpfifo entries into the gpfifo ring buffer, potentially
+ * splitting into two memcpys to handle wrap-around.
+ */
+static int nvgpu_submit_append_gpfifo(struct nvgpu_channel *c,
+		struct nvgpu_gpfifo_entry *kern_gpfifo,
+		struct nvgpu_gpfifo_userdata userdata,
+		u32 num_entries)
+{
+	int err;
+
+	if ((kern_gpfifo == NULL)
+#ifdef CONFIG_NVGPU_DGPU
+	    && (c->gpfifo.pipe == NULL)
+#endif
+	   ) {
+		/*
+		 * This path (from userspace to sysmem) is special in order to
+		 * avoid two copies unnecessarily (from user to pipe, then from
+		 * pipe to gpu sysmem buffer).
+		 */
+		err = nvgpu_submit_append_gpfifo_user_direct(c, userdata,
+				num_entries);
+		if (err != 0) {
+			return err;
+		}
+	}
+#ifdef CONFIG_NVGPU_DGPU
+	else if (kern_gpfifo == NULL) {
+		/* from userspace to vidmem, use the common path */
+		err = c->g->os_channel.copy_user_gpfifo(c->gpfifo.pipe,
+				userdata, 0, num_entries);
+		if (err != 0) {
+			return err;
+		}
+
+		nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe,
+				num_entries);
+	}
+#endif
+	else {
+		/* from kernel to either sysmem or vidmem, don't need
+		 * copy_user_gpfifo so use the common path */
+		nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries);
+	}
+
+	trace_write_pushbuffers(c, num_entries);
+
+	c->gpfifo.put = (c->gpfifo.put + num_entries) &
+		(c->gpfifo.entry_num - 1U);
+
+	return 0;
+}
+
+static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c,
+		struct nvgpu_gpfifo_entry *gpfifo,
+		struct nvgpu_gpfifo_userdata userdata,
+		u32 num_entries,
+		u32 flags,
+		struct nvgpu_channel_fence *fence,
+		struct nvgpu_fence_type **fence_out,
+		struct nvgpu_swprofiler *profiler,
+		bool need_deferred_cleanup)
+{
+	bool skip_buffer_refcounting = (flags &
+			NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
+	struct nvgpu_channel_job *job = NULL;
+	int err;
+
+	nvgpu_channel_joblist_lock(c);
+	err = nvgpu_channel_alloc_job(c, &job);
+	nvgpu_channel_joblist_unlock(c);
+	if (err != 0) {
+		return err;
+	}
+
+	err = nvgpu_submit_prepare_syncs(c, fence, job, flags);
+	if (err != 0) {
+		goto clean_up_job;
+	}
+
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING);
+
+	/*
+	 * wait_cmd can be unset even if flag_fence_wait exists; the
+	 * android sync framework for example can provide entirely
+	 * empty fences that act like trivially expired waits.
+	 */
+	if (job->wait_cmd != NULL) {
+		nvgpu_submit_append_priv_cmdbuf(c, job->wait_cmd);
+	}
+
+	err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, num_entries);
+	if (err != 0) {
+		goto clean_up_gpfifo_wait;
+	}
+
+	nvgpu_submit_append_priv_cmdbuf(c, job->incr_cmd);
+
+	err = nvgpu_channel_add_job(c, job, skip_buffer_refcounting);
+	if (err != 0) {
+		goto clean_up_gpfifo_incr;
+	}
+
+	nvgpu_channel_sync_mark_progress(c->sync, need_deferred_cleanup);
+
+	if (fence_out != NULL) {
+		/* This fence ref is going somewhere else but it's owned by the
+		 * job; the caller is expected to release it promptly, so that
+		 * a subsequent job cannot reclaim its memory.
+		 */
+		*fence_out = nvgpu_fence_get(&job->post_fence);
+	}
+
+	return 0;
+
+clean_up_gpfifo_incr:
+	/*
+	 * undo the incr priv cmdbuf and the user entries:
+	 * new gp.put =
+	 * (gp.put - (1 + num_entries)) & (gp.entry_num - 1) =
+	 * (gp.put + (gp.entry_num - (1 + num_entries))) & (gp.entry_num - 1)
+	 * the + entry_num does not affect the result but avoids wrapping below
+	 * zero for MISRA, although it would be well defined.
+	 */
+	c->gpfifo.put =
+		(nvgpu_safe_add_u32(c->gpfifo.put,
+		  nvgpu_safe_sub_u32(c->gpfifo.entry_num,
+		    nvgpu_safe_add_u32(1U, num_entries)))) &
+		nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U);
+clean_up_gpfifo_wait:
+	if (job->wait_cmd != NULL) {
+		/*
+		 * undo the wait priv cmdbuf entry:
+		 * gp.put =
+		 * (gp.put - 1) & (gp.entry_num - 1) =
+		 * (gp.put + (gp.entry_num - 1)) & (gp.entry_num - 1)
+		 * same as above with the gp.entry_num on the left side.
+		 */
+		c->gpfifo.put =
+			nvgpu_safe_add_u32(c->gpfifo.put,
+			  nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U)) &
+			nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U);
+	}
+	nvgpu_fence_put(&job->post_fence);
+	nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->incr_cmd);
+	if (job->wait_cmd != NULL) {
+		nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd);
+	}
+clean_up_job:
+	nvgpu_channel_free_job(c, job);
+	return err;
+}
+
+static int nvgpu_submit_prepare_gpfifo_notrack(struct nvgpu_channel *c,
+		struct nvgpu_gpfifo_entry *gpfifo,
+		struct nvgpu_gpfifo_userdata userdata,
+		u32 num_entries,
+		struct nvgpu_fence_type **fence_out,
+		struct nvgpu_swprofiler *profiler)
+{
+	int err;
+
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING);
+
+	err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
+			num_entries);
+	if (err != 0) {
+		return err;
+	}
+
+	if (fence_out != NULL) {
+		*fence_out = NULL;
+	}
+
+	return 0;
+}
+
+static int check_gpfifo_capacity(struct nvgpu_channel *c, u32 required)
+{
+	/*
+	 * Make sure we have enough space for gpfifo entries. Check cached
+	 * values first and then read from HW. If no space, return -EAGAIN
+	 * and let userpace decide to re-try request or not.
+	 */
+	if (nvgpu_channel_get_gpfifo_free_count(c) < required) {
+		if (nvgpu_channel_update_gpfifo_get_and_get_free_count(c) <
+				required) {
+			return -EAGAIN;
+		}
+	}
+
+	return 0;
+}
+
+static int nvgpu_do_submit(struct nvgpu_channel *c,
+		struct nvgpu_gpfifo_entry *gpfifo,
+		struct nvgpu_gpfifo_userdata userdata,
+		u32 num_entries,
+		u32 flags,
+		struct nvgpu_channel_fence *fence,
+		struct nvgpu_fence_type **fence_out,
+		struct nvgpu_swprofiler *profiler,
+		bool need_job_tracking,
+		bool need_deferred_cleanup)
+{
+	struct gk20a *g = c->g;
+	int err;
+
+#ifdef CONFIG_NVGPU_TRACE
+	trace_gk20a_channel_submit_gpfifo(g->name,
+					  c->chid,
+					  num_entries,
+					  flags,
+					  fence ? fence->id : 0,
+					  fence ? fence->value : 0);
+#endif
+
+	nvgpu_log_info(g, "pre-submit put %d, get %d, size %d",
+		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
+
+	err = check_gpfifo_capacity(c, num_entries + EXTRA_GPFIFO_ENTRIES);
+	if (err != 0) {
+		return err;
+	}
+
+	if (need_job_tracking) {
+		err = nvgpu_submit_prepare_gpfifo_track(c, gpfifo,
+				userdata, num_entries, flags, fence,
+				fence_out, profiler, need_deferred_cleanup);
+	} else {
+		err = nvgpu_submit_prepare_gpfifo_notrack(c, gpfifo,
+				userdata, num_entries, fence_out, profiler);
+	}
+
+	if (err != 0) {
+		return err;
+	}
+
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_APPEND);
+
+	g->ops.userd.gp_put(g, c);
+
+	return 0;
+}
+
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
+static int nvgpu_submit_deterministic(struct nvgpu_channel *c,
+				struct nvgpu_gpfifo_entry *gpfifo,
+				struct nvgpu_gpfifo_userdata userdata,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct nvgpu_fence_type **fence_out,
+				struct nvgpu_swprofiler *profiler)
+{
+	bool skip_buffer_refcounting = (flags &
+			NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
+	bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U;
+	bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U;
+	bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U;
+	struct gk20a *g = c->g;
+	bool need_job_tracking;
+	int err = 0;
+
+	nvgpu_assert(nvgpu_channel_is_deterministic(c));
+
+	/* sync framework on post fences would not be deterministic */
+	if (flag_fence_get && flag_sync_fence) {
+		return -EINVAL;
+	}
+
+	/* this would be O(n) */
+	if (!skip_buffer_refcounting) {
+		return -EINVAL;
+	}
+
+	/* the watchdog needs periodic job cleanup */
+	if (nvgpu_channel_wdt_enabled(c->wdt)) {
+		return -EINVAL;
+	}
+
+	/*
+	 * Job tracking is necessary on deterministic channels if and only if
+	 * pre- or post-fence functionality is needed. If not, a fast submit
+	 * can be done (ie. only need to write out userspace GPFIFO entries and
+	 * update GP_PUT).
+	 */
+	need_job_tracking = flag_fence_wait || flag_fence_get;
+
+	if (need_job_tracking) {
+		/* nvgpu_semaphore is dynamically allocated, not pooled */
+		if (!nvgpu_has_syncpoints(g)) {
+			return -EINVAL;
+		}
+
+		/* dynamic sync allocation wouldn't be deterministic */
+		if (g->aggressive_sync_destroy_thresh != 0U) {
+			return -EINVAL;
+		}
+
+		/*
+		 * (Try to) clean up a single job, if available. Each job
+		 * requires the same amount of metadata, so this is enough for
+		 * the job list, fence pool, and private command buffers that
+		 * this submit will need.
+		 *
+		 * This submit might still need more gpfifo space than what the
+		 * previous has used. The job metadata doesn't look at it
+		 * though - the hw GP_GET pointer can be much further away than
+		 * our metadata pointers; gpfifo space is "freed" by the HW.
+		 */
+		nvgpu_channel_clean_up_deterministic_job(c);
+	}
+
+	/* Grab access to HW to deal with do_idle */
+	nvgpu_rwsem_down_read(&g->deterministic_busy);
+
+	if (c->deterministic_railgate_allowed) {
+		/*
+		 * Nope - this channel has dropped its own power ref. As
+		 * deterministic submits don't hold power on per each submitted
+		 * job like normal ones do, the GPU might railgate any time now
+		 * and thus submit is disallowed.
+		 */
+		err = -EINVAL;
+		goto clean_up;
+	}
+
+	err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence,
+			fence_out, profiler, need_job_tracking, false);
+	if (err != 0) {
+		goto clean_up;
+	}
+
+	/* No hw access beyond this point */
+	nvgpu_rwsem_up_read(&g->deterministic_busy);
+
+	return 0;
+
+clean_up:
+	nvgpu_log_fn(g, "fail %d", err);
+	nvgpu_rwsem_up_read(&g->deterministic_busy);
+
+	return err;
+}
+#endif
+
+static int nvgpu_submit_nondeterministic(struct nvgpu_channel *c,
+				struct nvgpu_gpfifo_entry *gpfifo,
+				struct nvgpu_gpfifo_userdata userdata,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct nvgpu_fence_type **fence_out,
+				struct nvgpu_swprofiler *profiler)
+{
+	bool skip_buffer_refcounting = (flags &
+			NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
+	bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U;
+	bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U;
+	struct gk20a *g = c->g;
+	bool need_job_tracking;
+	int err = 0;
+
+	nvgpu_assert(!nvgpu_channel_is_deterministic(c));
+
+	/*
+	 * Job tracking is necessary for any of the following conditions on
+	 * non-deterministic channels:
+	 *  - pre- or post-fence functionality
+	 *  - GPU rail-gating
+	 *  - VPR resize enabled
+	 *  - buffer refcounting
+	 *  - channel watchdog
+	 *
+	 * If none of the conditions are met, then job tracking is not
+	 * required and a fast submit can be done (ie. only need to write
+	 * out userspace GPFIFO entries and update GP_PUT).
+	 */
+	need_job_tracking = flag_fence_wait ||
+			flag_fence_get ||
+			nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ||
+			nvgpu_is_vpr_resize_enabled() ||
+			!skip_buffer_refcounting ||
+			nvgpu_channel_wdt_enabled(c->wdt);
+
+	if (need_job_tracking) {
+		/*
+		 * Get a power ref because this isn't a deterministic
+		 * channel that holds them during the channel lifetime.
+		 * This one is released by nvgpu_channel_clean_up_jobs,
+		 * via syncpt or sema interrupt, whichever is used.
+		 */
+		err = gk20a_busy(g);
+		if (err != 0) {
+			nvgpu_err(g,
+				"failed to host gk20a to submit gpfifo");
+			nvgpu_print_current(g, NULL, NVGPU_ERROR);
+			return err;
+		}
+	}
+
+	err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence,
+			fence_out, profiler, need_job_tracking, true);
+	if (err != 0) {
+		goto clean_up;
+	}
+
+	return 0;
+
+clean_up:
+	nvgpu_log_fn(g, "fail %d", err);
+	gk20a_idle(g);
+
+	return err;
+}
+
+static int check_submit_allowed(struct nvgpu_channel *c)
+{
+	struct gk20a *g = c->g;
+
+	if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
+		return -ENODEV;
+	}
+
+	if (nvgpu_channel_check_unserviceable(c)) {
+		return -ETIMEDOUT;
+	}
+
+	if (c->usermode_submit_enabled) {
+		return -EINVAL;
+	}
+
+	if (!nvgpu_mem_is_valid(&c->gpfifo.mem)) {
+		return -ENOMEM;
+	}
+
+	/* an address space needs to have been bound at this point. */
+	if (!nvgpu_channel_as_bound(c)) {
+		nvgpu_err(g,
+			    "not bound to an address space at time of gpfifo"
+			    " submission.");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
+				struct nvgpu_gpfifo_entry *gpfifo,
+				struct nvgpu_gpfifo_userdata userdata,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct nvgpu_fence_type **fence_out,
+				struct nvgpu_swprofiler *profiler)
+{
+	struct gk20a *g = c->g;
+	int err;
+
+	err = check_submit_allowed(c);
+	if (err != 0) {
+		return err;
+	}
+
+	/*
+	 * Fifo not large enough for request. Return error immediately.
+	 * Kernel can insert gpfifo entries before and after user gpfifos.
+	 * So, add extra entries in user request. Also, HW with fifo size N
+	 * can accept only N-1 entries.
+	 */
+	if (c->gpfifo.entry_num - 1U < num_entries + EXTRA_GPFIFO_ENTRIES) {
+		nvgpu_err(g, "not enough gpfifo space allocated");
+		return -ENOMEM;
+	}
+
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_ENTRY);
+
+	/* update debug settings */
+	nvgpu_ltc_sync_enabled(g);
+
+	nvgpu_log_info(g, "channel %d", c->chid);
+
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
+	if (c->deterministic) {
+		err = nvgpu_submit_deterministic(c, gpfifo, userdata,
+				num_entries, flags, fence, fence_out, profiler);
+	} else
+#endif
+	{
+		err = nvgpu_submit_nondeterministic(c, gpfifo, userdata,
+				num_entries, flags, fence, fence_out, profiler);
+	}
+
+	if (err != 0) {
+		return err;
+	}
+
+#ifdef CONFIG_NVGPU_TRACE
+	if (fence_out != NULL && *fence_out != NULL) {
+		/*
+		 * This is not a good example on how to use the fence type.
+		 * Don't touch the priv data. The debug trace is special.
+		 */
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+		trace_gk20a_channel_submitted_gpfifo(g->name,
+					c->chid, num_entries, flags,
+					(*fence_out)->priv.syncpt_id,
+					(*fence_out)->priv.syncpt_value);
+#else
+		trace_gk20a_channel_submitted_gpfifo(g->name,
+					c->chid, num_entries, flags,
+					0, 0);
+#endif
+	} else {
+		trace_gk20a_channel_submitted_gpfifo(g->name,
+					c->chid, num_entries, flags,
+					0, 0);
+	}
+#endif
+
+	nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
+		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
+
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_END);
+
+	nvgpu_log_fn(g, "done");
+	return err;
+}
+
+int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
+				struct nvgpu_gpfifo_userdata userdata,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct nvgpu_user_fence *fence_out,
+				struct nvgpu_swprofiler *profiler)
+{
+	struct nvgpu_fence_type *fence_internal = NULL;
+	int err;
+
+	err = nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries,
+			flags, fence, &fence_internal, profiler);
+	if (err == 0 && fence_internal != NULL) {
+		*fence_out = nvgpu_fence_extract_user(fence_internal);
+		nvgpu_fence_put(fence_internal);
+	}
+	return err;
+}
+
+int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
+				struct nvgpu_gpfifo_entry *gpfifo,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct nvgpu_fence_type **fence_out)
+{
+	struct nvgpu_gpfifo_userdata userdata = { NULL, NULL };
+
+	return nvgpu_submit_channel_gpfifo(c, gpfifo, userdata, num_entries,
+			flags, fence, fence_out, NULL);
+}
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
--- a/drivers/gpu/nvgpu/common/fifo/userd.c
+++ b/drivers/gpu/nvgpu/common/fifo/userd.c
@@ -0,0 +1,157 @@
+/*
+ * USERD
+ *
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/trace.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/fifo.h>
+#include <nvgpu/fifo/userd.h>
+#include <nvgpu/vm_area.h>
+#include <nvgpu/dma.h>
+
+int nvgpu_userd_init_slabs(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	int err;
+
+	nvgpu_mutex_init(&f->userd_mutex);
+
+	f->num_channels_per_slab = NVGPU_CPU_PAGE_SIZE /  g->ops.userd.entry_size(g);
+	f->num_userd_slabs =
+		DIV_ROUND_UP(f->num_channels, f->num_channels_per_slab);
+
+	f->userd_slabs = nvgpu_big_zalloc(g, f->num_userd_slabs *
+					sizeof(struct nvgpu_mem));
+	if (f->userd_slabs == NULL) {
+		nvgpu_err(g, "could not allocate userd slabs");
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	return 0;
+
+clean_up:
+	nvgpu_mutex_destroy(&f->userd_mutex);
+
+	return err;
+}
+
+void nvgpu_userd_free_slabs(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 slab;
+
+	for (slab = 0; slab < f->num_userd_slabs; slab++) {
+		nvgpu_dma_free(g, &f->userd_slabs[slab]);
+	}
+	nvgpu_big_free(g, f->userd_slabs);
+	f->userd_slabs = NULL;
+
+	nvgpu_mutex_destroy(&f->userd_mutex);
+}
+
+int nvgpu_userd_init_channel(struct gk20a *g, struct nvgpu_channel *c)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	struct nvgpu_mem *mem;
+	u32 slab = c->chid / f->num_channels_per_slab;
+	int err = 0;
+
+	if (slab > f->num_userd_slabs) {
+		nvgpu_err(g, "chid %u, slab %u out of range (max=%u)",
+			c->chid, slab,  f->num_userd_slabs);
+		return -EINVAL;
+	}
+
+	mem = &g->fifo.userd_slabs[slab];
+
+	nvgpu_mutex_acquire(&f->userd_mutex);
+	if (!nvgpu_mem_is_valid(mem)) {
+		err = nvgpu_dma_alloc_sys(g, NVGPU_CPU_PAGE_SIZE, mem);
+		if (err != 0) {
+			nvgpu_err(g, "userd allocation failed, err=%d", err);
+			goto done;
+		}
+
+		if (g->ops.mm.is_bar1_supported(g)) {
+			mem->gpu_va = g->ops.mm.bar1_map_userd(g, mem,
+							 slab * NVGPU_CPU_PAGE_SIZE);
+		}
+	}
+	c->userd_mem = mem;
+	c->userd_offset = (c->chid % f->num_channels_per_slab) *
+				g->ops.userd.entry_size(g);
+	c->userd_iova = nvgpu_channel_userd_addr(c);
+
+	nvgpu_log(g, gpu_dbg_info,
+		"chid=%u slab=%u mem=%p offset=%u addr=%llx gpu_va=%llx",
+		c->chid, slab, mem, c->userd_offset,
+		nvgpu_channel_userd_addr(c),
+		nvgpu_channel_userd_gpu_va(c));
+
+done:
+	nvgpu_mutex_release(&f->userd_mutex);
+	return err;
+}
+
+int nvgpu_userd_setup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	int err;
+	u32 size, num_pages;
+
+	err = nvgpu_userd_init_slabs(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init userd support");
+		return err;
+	}
+
+	size = f->num_channels * g->ops.userd.entry_size(g);
+	num_pages = DIV_ROUND_UP(size, NVGPU_CPU_PAGE_SIZE);
+	err = nvgpu_vm_area_alloc(g->mm.bar1.vm,
+			num_pages, NVGPU_CPU_PAGE_SIZE, &f->userd_gpu_va, 0);
+	if (err != 0) {
+		nvgpu_err(g, "userd gpu va allocation failed, err=%d", err);
+		goto clean_up;
+	}
+
+	return 0;
+
+clean_up:
+	nvgpu_userd_free_slabs(g);
+
+	return err;
+}
+
+void nvgpu_userd_cleanup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	if (f->userd_gpu_va != 0ULL) {
+		(void) nvgpu_vm_area_free(g->mm.bar1.vm, f->userd_gpu_va);
+		f->userd_gpu_va = 0ULL;
+	}
+
+	nvgpu_userd_free_slabs(g);
+}
--- a/drivers/gpu/nvgpu/common/fifo/watchdog.c
+++ b/drivers/gpu/nvgpu/common/fifo/watchdog.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/watchdog.h>
+#include <nvgpu/error_notifier.h>
+#include <nvgpu/watchdog.h>
+#include <nvgpu/string.h>
+
+struct nvgpu_channel_wdt {
+	struct gk20a *g;
+
+	/* lock protects the running timer state */
+	struct nvgpu_spinlock lock;
+	struct nvgpu_timeout timer;
+	bool running;
+	struct nvgpu_channel_wdt_state ch_state;
+
+	/* lock not needed */
+	u32 limit_ms;
+	bool enabled;
+};
+
+struct nvgpu_channel_wdt *nvgpu_channel_wdt_alloc(struct gk20a *g)
+{
+	struct nvgpu_channel_wdt *wdt = nvgpu_kzalloc(g, sizeof(*wdt));
+
+	if (wdt == NULL) {
+		return NULL;
+	}
+
+	wdt->g = g;
+	nvgpu_spinlock_init(&wdt->lock);
+	wdt->enabled = true;
+	wdt->limit_ms = g->ch_wdt_init_limit_ms;
+
+	return wdt;
+}
+
+void nvgpu_channel_wdt_destroy(struct nvgpu_channel_wdt *wdt)
+{
+	nvgpu_kfree(wdt->g, wdt);
+}
+
+void nvgpu_channel_wdt_enable(struct nvgpu_channel_wdt *wdt)
+{
+	wdt->enabled = true;
+}
+
+void nvgpu_channel_wdt_disable(struct nvgpu_channel_wdt *wdt)
+{
+	wdt->enabled = false;
+}
+
+bool nvgpu_channel_wdt_enabled(struct nvgpu_channel_wdt *wdt)
+{
+	return wdt->enabled;
+}
+
+void nvgpu_channel_wdt_set_limit(struct nvgpu_channel_wdt *wdt, u32 limit_ms)
+{
+	wdt->limit_ms = limit_ms;
+}
+
+u32 nvgpu_channel_wdt_limit(struct nvgpu_channel_wdt *wdt)
+{
+	return wdt->limit_ms;
+}
+
+static void nvgpu_channel_wdt_init(struct nvgpu_channel_wdt *wdt,
+		struct nvgpu_channel_wdt_state *state)
+{
+	struct gk20a *g = wdt->g;
+	int ret;
+
+	ret = nvgpu_timeout_init(g, &wdt->timer,
+			   wdt->limit_ms,
+			   NVGPU_TIMER_CPU_TIMER);
+	if (ret != 0) {
+		nvgpu_err(g, "timeout_init failed: %d", ret);
+		return;
+	}
+
+	wdt->ch_state = *state;
+	wdt->running = true;
+}
+
+/**
+ * Start a timeout counter (watchdog) on this channel.
+ *
+ * Trigger a watchdog to recover the channel after the per-platform timeout
+ * duration (but strictly no earlier) if the channel hasn't advanced within
+ * that time.
+ *
+ * If the timeout is already running, do nothing. This should be called when
+ * new jobs are submitted. The timeout will stop when the last tracked job
+ * finishes, making the channel idle.
+ */
+void nvgpu_channel_wdt_start(struct nvgpu_channel_wdt *wdt,
+		struct nvgpu_channel_wdt_state *state)
+{
+	if (!nvgpu_is_timeouts_enabled(wdt->g)) {
+		return;
+	}
+
+	if (!wdt->enabled) {
+		return;
+	}
+
+	nvgpu_spinlock_acquire(&wdt->lock);
+
+	if (wdt->running) {
+		nvgpu_spinlock_release(&wdt->lock);
+		return;
+	}
+	nvgpu_channel_wdt_init(wdt, state);
+	nvgpu_spinlock_release(&wdt->lock);
+}
+
+/**
+ * Stop a running timeout counter (watchdog) on this channel.
+ *
+ * Make the watchdog consider the channel not running, so that it won't get
+ * recovered even if no progress is detected. Progress is not tracked if the
+ * watchdog is turned off.
+ *
+ * No guarantees are made about concurrent execution of the timeout handler.
+ * (This should be called from an update handler running in the same thread
+ * with the watchdog.)
+ */
+bool nvgpu_channel_wdt_stop(struct nvgpu_channel_wdt *wdt)
+{
+	bool was_running;
+
+	nvgpu_spinlock_acquire(&wdt->lock);
+	was_running = wdt->running;
+	wdt->running = false;
+	nvgpu_spinlock_release(&wdt->lock);
+	return was_running;
+}
+
+/**
+ * Continue a previously stopped timeout
+ *
+ * Enable the timeout again but don't reinitialize its timer.
+ *
+ * No guarantees are made about concurrent execution of the timeout handler.
+ * (This should be called from an update handler running in the same thread
+ * with the watchdog.)
+ */
+void nvgpu_channel_wdt_continue(struct nvgpu_channel_wdt *wdt)
+{
+	nvgpu_spinlock_acquire(&wdt->lock);
+	wdt->running = true;
+	nvgpu_spinlock_release(&wdt->lock);
+}
+
+/**
+ * Reset the counter of a timeout that is in effect.
+ *
+ * If this channel has an active timeout, act as if something happened on the
+ * channel right now.
+ *
+ * Rewinding a stopped counter is irrelevant; this is a no-op for non-running
+ * timeouts. Stopped timeouts can only be started (which is technically a
+ * rewind too) or continued (where the stop is actually pause).
+ */
+void nvgpu_channel_wdt_rewind(struct nvgpu_channel_wdt *wdt,
+		struct nvgpu_channel_wdt_state *state)
+{
+	nvgpu_spinlock_acquire(&wdt->lock);
+	if (wdt->running) {
+		nvgpu_channel_wdt_init(wdt, state);
+	}
+	nvgpu_spinlock_release(&wdt->lock);
+}
+
+/**
+ * Check if the watchdog is running.
+ *
+ * A running watchdog means one that is requested to run and expire in the
+ * future. The state of a running watchdog has to be checked periodically to
+ * see if it's expired.
+ */
+bool nvgpu_channel_wdt_running(struct nvgpu_channel_wdt *wdt)
+{
+	bool running;
+
+	nvgpu_spinlock_acquire(&wdt->lock);
+	running = wdt->running;
+	nvgpu_spinlock_release(&wdt->lock);
+
+	return running;
+}
+
+/**
+ * Check if a channel has been stuck for the watchdog limit.
+ *
+ * Test if this channel has really got stuck at this point by checking if its
+ * {gp,pb}_get have advanced or not. If progress was detected, start the timer
+ * from zero again. If no {gp,pb}_get action happened in the watchdog time
+ * limit, return true. Else return false.
+ */
+static bool nvgpu_channel_wdt_handler(struct nvgpu_channel_wdt *wdt,
+		struct nvgpu_channel_wdt_state *state)
+{
+	struct gk20a *g = wdt->g;
+	struct nvgpu_channel_wdt_state previous_state;
+
+	nvgpu_log_fn(g, " ");
+
+	/* Get status but keep timer running */
+	nvgpu_spinlock_acquire(&wdt->lock);
+	previous_state = wdt->ch_state;
+	nvgpu_spinlock_release(&wdt->lock);
+
+	if (nvgpu_memcmp((const u8 *)state,
+			(const u8 *)&previous_state,
+			sizeof(*state)) != 0) {
+		/* Channel has advanced, timer keeps going but resets */
+		nvgpu_channel_wdt_rewind(wdt, state);
+		return false;
+	}
+
+	if (!nvgpu_timeout_peek_expired(&wdt->timer)) {
+		/* Seems stuck but waiting to time out */
+		return false;
+	}
+
+	return true;
+}
+
+/**
+ * Test if the per-channel watchdog is on; check the timeout in that case.
+ *
+ * Each channel has an expiration time based watchdog. The timer is
+ * (re)initialized in two situations: when a new job is submitted on an idle
+ * channel and when the timeout is checked but progress is detected. The
+ * watchdog timeout limit is a coarse sliding window.
+ *
+ * The timeout is stopped (disabled) after the last job in a row finishes
+ * and marks the channel idle.
+ */
+bool nvgpu_channel_wdt_check(struct nvgpu_channel_wdt *wdt,
+		struct nvgpu_channel_wdt_state *state)
+{
+	bool running;
+
+	nvgpu_spinlock_acquire(&wdt->lock);
+	running = wdt->running;
+	nvgpu_spinlock_release(&wdt->lock);
+
+	if (running) {
+		return nvgpu_channel_wdt_handler(wdt, state);
+	} else {
+		return false;
+	}
+}
--- a/drivers/gpu/nvgpu/common/gr/ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/ctx.c
--- a/drivers/gpu/nvgpu/common/gr/ctx_priv.h
+++ b/drivers/gpu/nvgpu/common/gr/ctx_priv.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_GR_CTX_PRIV_H
+#define NVGPU_GR_CTX_PRIV_H
+
+struct nvgpu_mem;
+
+/**
+ * Patch context buffer descriptor structure.
+ *
+ * Pointer to this structure is maintained in #nvgpu_gr_ctx structure.
+ */
+struct patch_desc {
+	/**
+	 * Memory to hold patch context buffer.
+	 */
+	struct nvgpu_mem mem;
+
+	/**
+	 * Count of entries written into patch context buffer.
+	 */
+	u32 data_count;
+};
+
+#ifdef CONFIG_NVGPU_GRAPHICS
+struct zcull_ctx_desc {
+	u64 gpu_va;
+	u32 ctx_sw_mode;
+};
+#endif
+
+#ifdef CONFIG_NVGPU_DEBUGGER
+struct pm_ctx_desc {
+	struct nvgpu_mem mem;
+	u64 gpu_va;
+	u32 pm_mode;
+};
+#endif
+
+/**
+ * GR context descriptor structure.
+ *
+ * This structure stores various properties of all GR context buffers.
+ */
+struct nvgpu_gr_ctx_desc {
+	/**
+	 * Array to store all GR context buffer sizes.
+	 */
+	u32 size[NVGPU_GR_CTX_COUNT];
+
+#ifdef CONFIG_NVGPU_GRAPHICS
+	bool force_preemption_gfxp;
+#endif
+
+#ifdef CONFIG_NVGPU_CILP
+	bool force_preemption_cilp;
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+	bool dump_ctxsw_stats_on_channel_close;
+#endif
+};
+
+/**
+ * Graphics context buffer structure.
+ *
+ * This structure stores all the properties of a graphics context
+ * buffer. One graphics context is allocated per GPU Time Slice
+ * Group (TSG).
+ */
+struct nvgpu_gr_ctx {
+	/**
+	 * Context ID read from graphics context buffer.
+	 */
+	u32 ctx_id;
+
+	/**
+	 * Flag to indicate if above context ID is valid or not.
+	 */
+	bool ctx_id_valid;
+
+	/**
+	 * Memory to hold graphics context buffer.
+	 */
+	struct nvgpu_mem mem;
+
+#ifdef CONFIG_NVGPU_GFXP
+	struct nvgpu_mem preempt_ctxsw_buffer;
+	struct nvgpu_mem spill_ctxsw_buffer;
+	struct nvgpu_mem betacb_ctxsw_buffer;
+	struct nvgpu_mem pagepool_ctxsw_buffer;
+	struct nvgpu_mem gfxp_rtvcb_ctxsw_buffer;
+#endif
+
+	/**
+	 * Patch context buffer descriptor struct.
+	 */
+	struct patch_desc	patch_ctx;
+
+#ifdef CONFIG_NVGPU_GRAPHICS
+	struct zcull_ctx_desc	zcull_ctx;
+#endif
+#ifdef CONFIG_NVGPU_DEBUGGER
+	struct pm_ctx_desc	pm_ctx;
+#endif
+
+	/**
+	 * Graphics preemption mode of the graphics context.
+	 */
+	u32 graphics_preempt_mode;
+
+	/**
+	 * Compute preemption mode of the graphics context.
+	 */
+	u32 compute_preempt_mode;
+
+#ifdef CONFIG_NVGPU_NON_FUSA
+	bool golden_img_loaded;
+#endif
+
+#ifdef CONFIG_NVGPU_CILP
+	bool cilp_preempt_pending;
+#endif
+
+#ifdef CONFIG_NVGPU_DEBUGGER
+	bool boosted_ctx;
+#endif
+
+	/**
+	 * Array to store GPU virtual addresses of all global context
+	 * buffers.
+	 */
+	u64	global_ctx_buffer_va[NVGPU_GR_CTX_VA_COUNT];
+
+	/**
+	 * Array to store indexes of global context buffers
+	 * corresponding to GPU virtual addresses above.
+	 */
+	u32	global_ctx_buffer_index[NVGPU_GR_CTX_VA_COUNT];
+
+	/**
+	 * Flag to indicate if global context buffers are mapped and
+	 * #global_ctx_buffer_va array is populated.
+	 */
+	bool	global_ctx_buffer_mapped;
+
+	/**
+	 * TSG identifier corresponding to the graphics context.
+	 */
+	u32 tsgid;
+
+#ifdef CONFIG_NVGPU_SM_DIVERSITY
+	/** SM diversity configuration offset.
+	 * It is valid only if NVGPU_SUPPORT_SM_DIVERSITY support is true.
+	 * else input param is just ignored.
+	 * A valid offset starts from 0 to
+	 * (#gk20a.max_sm_diversity_config_count - 1).
+	 */
+	u32 sm_diversity_config;
+#endif
+};
+
+#endif /* NVGPU_GR_CTX_PRIV_H */
--- a/drivers/gpu/nvgpu/common/gr/fecs_trace.c
+++ b/drivers/gpu/nvgpu/common/gr/fecs_trace.c
@@ -0,0 +1,700 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/nvgpu_init.h>
+#include <nvgpu/list.h>
+#include <nvgpu/log.h>
+#include <nvgpu/log2.h>
+#include <nvgpu/mm.h>
+#include <nvgpu/circ_buf.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/enabled.h>
+#include <nvgpu/gr/global_ctx.h>
+#include <nvgpu/gr/ctx.h>
+#include <nvgpu/gr/subctx.h>
+#include <nvgpu/gr/fecs_trace.h>
+#include <nvgpu/gr/gr_utils.h>
+
+static int nvgpu_gr_fecs_trace_periodic_polling(void *arg);
+
+int nvgpu_gr_fecs_trace_add_context(struct gk20a *g, u32 context_ptr,
+	pid_t pid, u32 vmid, struct nvgpu_list_node *list)
+{
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+	struct nvgpu_fecs_trace_context_entry *entry;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
+		"adding hash entry context_ptr=%x -> pid=%d, vmid=%d",
+		context_ptr, pid, vmid);
+
+	entry = nvgpu_kzalloc(g, sizeof(*entry));
+	if (entry == NULL) {
+		nvgpu_err(g,
+			"can't alloc new entry for context_ptr=%x pid=%d vmid=%d",
+			context_ptr, pid, vmid);
+		return -ENOMEM;
+	}
+
+	nvgpu_init_list_node(&entry->entry);
+	entry->context_ptr = context_ptr;
+	entry->pid = pid;
+	entry->vmid = vmid;
+
+	nvgpu_mutex_acquire(&trace->list_lock);
+	nvgpu_list_add_tail(&entry->entry, list);
+	nvgpu_mutex_release(&trace->list_lock);
+
+	return 0;
+}
+
+void nvgpu_gr_fecs_trace_remove_context(struct gk20a *g, u32 context_ptr,
+	struct nvgpu_list_node *list)
+{
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+	struct nvgpu_fecs_trace_context_entry *entry, *tmp;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
+		"freeing entry context_ptr=%x", context_ptr);
+
+	nvgpu_mutex_acquire(&trace->list_lock);
+	nvgpu_list_for_each_entry_safe(entry, tmp, list,
+			nvgpu_fecs_trace_context_entry,	entry) {
+		if (entry->context_ptr == context_ptr) {
+			nvgpu_list_del(&entry->entry);
+			nvgpu_log(g, gpu_dbg_ctxsw,
+				"freed entry=%p context_ptr=%x", entry,
+				entry->context_ptr);
+			nvgpu_kfree(g, entry);
+			break;
+		}
+	}
+	nvgpu_mutex_release(&trace->list_lock);
+}
+
+void nvgpu_gr_fecs_trace_remove_contexts(struct gk20a *g,
+	struct nvgpu_list_node *list)
+{
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+	struct nvgpu_fecs_trace_context_entry *entry, *tmp;
+
+	nvgpu_mutex_acquire(&trace->list_lock);
+	nvgpu_list_for_each_entry_safe(entry, tmp, list,
+			nvgpu_fecs_trace_context_entry,	entry) {
+		nvgpu_list_del(&entry->entry);
+		nvgpu_kfree(g, entry);
+	}
+	nvgpu_mutex_release(&trace->list_lock);
+}
+
+void nvgpu_gr_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr,
+	struct nvgpu_list_node *list, pid_t *pid, u32 *vmid)
+{
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+	struct nvgpu_fecs_trace_context_entry *entry;
+
+	nvgpu_mutex_acquire(&trace->list_lock);
+	nvgpu_list_for_each_entry(entry, list, nvgpu_fecs_trace_context_entry,
+			entry) {
+		if (entry->context_ptr == context_ptr) {
+			nvgpu_log(g, gpu_dbg_ctxsw,
+				"found context_ptr=%x -> pid=%d, vmid=%d",
+				entry->context_ptr, entry->pid, entry->vmid);
+			*pid = entry->pid;
+			*vmid = entry->vmid;
+			nvgpu_mutex_release(&trace->list_lock);
+			return;
+		}
+	}
+	nvgpu_mutex_release(&trace->list_lock);
+
+	*pid = 0;
+	*vmid = 0xffffffffU;
+}
+
+int nvgpu_gr_fecs_trace_init(struct gk20a *g)
+{
+	struct nvgpu_gr_fecs_trace *trace;
+
+	if (!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS)) {
+		nvgpu_err(g, "invalid NUM_RECORDS chosen");
+		nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, false);
+		return -EINVAL;
+	}
+
+	trace = nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_fecs_trace));
+	if (trace == NULL) {
+		nvgpu_err(g, "failed to allocate fecs_trace");
+		nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, false);
+		return -ENOMEM;
+	}
+	g->fecs_trace = trace;
+
+	nvgpu_mutex_init(&trace->poll_lock);
+	nvgpu_mutex_init(&trace->list_lock);
+	nvgpu_mutex_init(&trace->enable_lock);
+
+	nvgpu_init_list_node(&trace->context_list);
+
+	trace->enable_count = 0;
+
+	return 0;
+}
+
+int nvgpu_gr_fecs_trace_deinit(struct gk20a *g)
+{
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+
+	if (trace == NULL) {
+		return 0;
+	}
+
+	/*
+	 * Check if tracer was enabled before attempting to stop the
+	 * tracer thread.
+	 */
+	if (trace->enable_count > 0) {
+		nvgpu_thread_stop(&trace->poll_task);
+	}
+
+	nvgpu_gr_fecs_trace_remove_contexts(g, &trace->context_list);
+
+	nvgpu_mutex_destroy(&g->fecs_trace->list_lock);
+	nvgpu_mutex_destroy(&g->fecs_trace->poll_lock);
+	nvgpu_mutex_destroy(&g->fecs_trace->enable_lock);
+
+	nvgpu_kfree(g, g->fecs_trace);
+	g->fecs_trace = NULL;
+	return 0;
+}
+
+int nvgpu_gr_fecs_trace_num_ts(struct gk20a *g)
+{
+	return (g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes()
+		- sizeof(struct nvgpu_fecs_trace_record)) / sizeof(u64);
+}
+
+struct nvgpu_fecs_trace_record *nvgpu_gr_fecs_trace_get_record(
+	struct gk20a *g, int idx)
+{
+	struct nvgpu_gr_global_ctx_buffer_desc *gr_global_ctx_buffer =
+				nvgpu_gr_get_global_ctx_buffer_ptr(g);
+	struct nvgpu_mem *mem = nvgpu_gr_global_ctx_buffer_get_mem(
+					gr_global_ctx_buffer,
+					NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER);
+	if (mem == NULL) {
+		return NULL;
+	}
+
+	return (struct nvgpu_fecs_trace_record *)
+		((u8 *) mem->cpu_va +
+		(idx * g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes()));
+}
+
+bool nvgpu_gr_fecs_trace_is_valid_record(struct gk20a *g,
+	struct nvgpu_fecs_trace_record *r)
+{
+	/*
+	 * testing magic_hi should suffice. magic_lo is sometimes used
+	 * as a sequence number in experimental ucode.
+	 */
+	return g->ops.gr.ctxsw_prog.is_ts_valid_record(r->magic_hi);
+}
+
+size_t nvgpu_gr_fecs_trace_buffer_size(struct gk20a *g)
+{
+	return GK20A_FECS_TRACE_NUM_RECORDS
+			* g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes();
+}
+
+int nvgpu_gr_fecs_trace_max_entries(struct gk20a *g,
+		struct nvgpu_gpu_ctxsw_trace_filter *filter)
+{
+	int n;
+	int tag;
+
+	/* Compute number of entries per record, with given filter */
+	for (n = 0, tag = 0; tag < nvgpu_gr_fecs_trace_num_ts(g); tag++)
+		n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
+
+	/* Return max number of entries generated for the whole ring */
+	return n * GK20A_FECS_TRACE_NUM_RECORDS;
+}
+
+int nvgpu_gr_fecs_trace_enable(struct gk20a *g)
+{
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+	int write;
+	int err = 0;
+
+	nvgpu_mutex_acquire(&trace->enable_lock);
+	trace->enable_count++;
+
+	if (trace->enable_count == 1U) {
+		/* drop data in hw buffer */
+		if (g->ops.gr.fecs_trace.flush)
+			g->ops.gr.fecs_trace.flush(g);
+
+		write = g->ops.gr.fecs_trace.get_write_index(g);
+
+		if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
+			/*
+			 * For enabling FECS trace support, MAILBOX1's MSB
+			 * (Bit 31:31) should be set to 1. Bits 30:0 represents
+			 * actual pointer value.
+			 */
+			write = write |
+				(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT));
+		}
+
+		g->ops.gr.fecs_trace.set_read_index(g, write);
+
+		/*
+		 * FECS ucode does a priv holdoff around the assertion of
+		 * context reset. So, pri transactions (e.g. mailbox1 register
+		 * write) might fail due to this. Hence, do write with ack
+		 * i.e. write and read it back to make sure write happened for
+		 * mailbox1.
+		 */
+		while (g->ops.gr.fecs_trace.get_read_index(g) != write) {
+			nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed");
+			g->ops.gr.fecs_trace.set_read_index(g, write);
+		}
+
+		err = nvgpu_thread_create(&trace->poll_task, g,
+				nvgpu_gr_fecs_trace_periodic_polling, __func__);
+		if (err != 0) {
+			nvgpu_warn(g, "failed to create FECS polling task");
+			goto done;
+		}
+	}
+
+done:
+	nvgpu_mutex_release(&trace->enable_lock);
+	return err;
+}
+
+int nvgpu_gr_fecs_trace_disable(struct gk20a *g)
+{
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+	int read = 0;
+
+	if (trace == NULL) {
+		return -EINVAL;
+	}
+
+	nvgpu_mutex_acquire(&trace->enable_lock);
+	if (trace->enable_count <= 0U) {
+		nvgpu_mutex_release(&trace->enable_lock);
+		return 0;
+	}
+
+	trace->enable_count--;
+	if (trace->enable_count == 0U) {
+		if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
+			/*
+			 * For disabling FECS trace support, MAILBOX1's MSB
+			 * (Bit 31:31) should be set to 0.
+			 */
+			read = g->ops.gr.fecs_trace.get_read_index(g) &
+				(~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)));
+
+			g->ops.gr.fecs_trace.set_read_index(g, read);
+
+			/*
+			 * FECS ucode does a priv holdoff around the assertion
+			 * of context reset. So, pri transactions (e.g.
+			 * mailbox1 register write) might fail due to this.
+			 * Hence, do write with ack i.e. write and read it back
+			 * to make sure write happened for mailbox1.
+			 */
+			while (g->ops.gr.fecs_trace.get_read_index(g) != read) {
+				nvgpu_log(g, gpu_dbg_ctxsw,
+						"mailbox1 update failed");
+				g->ops.gr.fecs_trace.set_read_index(g, read);
+			}
+		}
+		nvgpu_thread_stop(&trace->poll_task);
+	}
+	nvgpu_mutex_release(&trace->enable_lock);
+
+	return 0;
+}
+
+bool nvgpu_gr_fecs_trace_is_enabled(struct gk20a *g)
+{
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+
+	return (trace && (trace->enable_count > 0));
+}
+
+void nvgpu_gr_fecs_trace_reset_buffer(struct gk20a *g)
+{
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
+
+	g->ops.gr.fecs_trace.set_read_index(g,
+		g->ops.gr.fecs_trace.get_write_index(g));
+}
+
+/*
+ * Converts HW entry format to userspace-facing format and pushes it to the
+ * queue.
+ */
+int nvgpu_gr_fecs_trace_ring_read(struct gk20a *g, int index,
+	u32 *vm_update_mask)
+{
+	int i;
+	struct nvgpu_gpu_ctxsw_trace_entry entry = { };
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+	pid_t cur_pid = 0, new_pid = 0;
+	u32 cur_vmid = 0U, new_vmid = 0U;
+	u32 vmid = 0U;
+	int count = 0;
+
+	struct nvgpu_fecs_trace_record *r =
+		nvgpu_gr_fecs_trace_get_record(g, index);
+	if (r == NULL) {
+		return -EINVAL;
+	}
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
+		"consuming record trace=%p read=%d record=%p", trace, index, r);
+
+	if (!nvgpu_gr_fecs_trace_is_valid_record(g, r)) {
+		nvgpu_warn(g,
+			"trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
+			trace, index, r, r->magic_lo, r->magic_hi);
+		return -EINVAL;
+	}
+
+	/* Clear magic_hi to detect cases where CPU could read write index
+	 * before FECS record is actually written to DRAM. This should not
+	 * as we force FECS writes to SYSMEM by reading through PRAMIN.
+	 */
+	r->magic_hi = 0;
+
+	if ((r->context_ptr != 0U) && (r->context_id != 0U)) {
+		nvgpu_gr_fecs_trace_find_pid(g, r->context_ptr,
+			&trace->context_list, &cur_pid, &cur_vmid);
+	} else {
+		cur_vmid = 0xffffffffU;
+		cur_pid = 0;
+	}
+
+	if (r->new_context_ptr != 0U) {
+		nvgpu_gr_fecs_trace_find_pid(g, r->new_context_ptr,
+			&trace->context_list, &new_pid, &new_vmid);
+	} else {
+		new_vmid = 0xffffffffU;
+		new_pid = 0;
+	}
+
+	nvgpu_log(g, gpu_dbg_ctxsw,
+		"context_ptr=%x (vmid=%u pid=%d)",
+		r->context_ptr, cur_vmid, cur_pid);
+	nvgpu_log(g, gpu_dbg_ctxsw,
+		"new_context_ptr=%x (vmid=%u pid=%d)",
+		r->new_context_ptr, new_vmid, new_pid);
+
+	entry.context_id = r->context_id;
+
+	/* break out FECS record into trace events */
+	for (i = 0; i < nvgpu_gr_fecs_trace_num_ts(g); i++) {
+
+		entry.tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]);
+		entry.timestamp =
+			g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]);
+		entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
+
+		nvgpu_log(g, gpu_dbg_ctxsw,
+			"tag=%x timestamp=%llx context_id=%08x new_context_id=%08x",
+			entry.tag, entry.timestamp, r->context_id,
+			r->new_context_id);
+
+		switch (nvgpu_gpu_ctxsw_tags_to_common_tags(entry.tag)) {
+		case NVGPU_GPU_CTXSW_TAG_RESTORE_START:
+		case NVGPU_GPU_CTXSW_TAG_CONTEXT_START:
+			entry.context_id = r->new_context_id;
+			entry.pid = new_pid;
+			entry.vmid = new_vmid;
+			break;
+
+		case NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
+		case NVGPU_GPU_CTXSW_TAG_FE_ACK:
+		case NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI:
+		case NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP:
+		case NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP:
+		case NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP:
+		case NVGPU_GPU_CTXSW_TAG_SAVE_END:
+			entry.context_id = r->context_id;
+			entry.pid = cur_pid;
+			entry.vmid = cur_vmid;
+			break;
+
+		default:
+			/* tags are not guaranteed to start at the beginning */
+			if ((entry.tag != 0) && (entry.tag !=
+				    NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP)) {
+				nvgpu_warn(g, "TAG not found");
+			}
+			continue;
+		}
+
+		nvgpu_log(g, gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld",
+			entry.tag, entry.context_id, entry.pid);
+
+		if (!entry.context_id)
+			continue;
+
+		if (g->ops.gr.fecs_trace.vm_dev_write != NULL) {
+			g->ops.gr.fecs_trace.vm_dev_write(g, entry.vmid,
+				vm_update_mask, &entry);
+		} else {
+			nvgpu_gr_fecs_trace_write_entry(g, &entry);
+		}
+		count++;
+	}
+
+	nvgpu_gr_fecs_trace_wake_up(g, vmid);
+	return count;
+}
+
+int nvgpu_gr_fecs_trace_poll(struct gk20a *g)
+{
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+	u32 vm_update_mask = 0U;
+	int read = 0;
+	int write = 0;
+	int cnt;
+	int err = 0;
+
+	nvgpu_mutex_acquire(&trace->poll_lock);
+	if (trace->enable_count == 0) {
+		goto done_unlock;
+	}
+
+	err = gk20a_busy(g);
+	if (err) {
+		goto done_unlock;
+	}
+
+	write = g->ops.gr.fecs_trace.get_write_index(g);
+	if ((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS)) {
+		nvgpu_err(g,
+			"failed to acquire write index, write=%d", write);
+		err = write;
+		goto done;
+	}
+
+	read = g->ops.gr.fecs_trace.get_read_index(g);
+
+	cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS);
+	if (!cnt)
+		goto done;
+
+	nvgpu_log(g, gpu_dbg_ctxsw,
+		"circular buffer: read=%d (mailbox=%d) write=%d cnt=%d",
+		read, g->ops.gr.fecs_trace.get_read_index(g), write, cnt);
+
+	/* Ensure all FECS writes have made it to SYSMEM */
+	err = g->ops.mm.cache.fb_flush(g);
+	if (err != 0) {
+		nvgpu_err(g, "mm.cache.fb_flush() failed err=%d", err);
+		goto done;
+	}
+
+	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
+		/* Bits 30:0 of MAILBOX1 represents actual read pointer value */
+		read = read & (~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)));
+	}
+
+	while (read != write) {
+		cnt = nvgpu_gr_fecs_trace_ring_read(g, read, &vm_update_mask);
+		if (cnt <= 0) {
+			break;
+		}
+
+		/* Get to next record. */
+		read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
+	}
+
+	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
+		/*
+		 * In the next step, read pointer is going to be updated.
+		 * So, MSB of read pointer should be set back to 1. This will
+		 * keep FECS trace enabled.
+		 */
+		read = read | (BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT));
+	}
+
+	/* ensure FECS records has been updated before incrementing read index */
+	nvgpu_wmb();
+	g->ops.gr.fecs_trace.set_read_index(g, read);
+
+	/*
+	 * FECS ucode does a priv holdoff around the assertion of context
+	 * reset. So, pri transactions (e.g. mailbox1 register write) might
+	 * fail due to this. Hence, do write with ack i.e. write and read
+	 * it back to make sure write happened for mailbox1.
+	 */
+	while (g->ops.gr.fecs_trace.get_read_index(g) != read) {
+		nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed");
+		g->ops.gr.fecs_trace.set_read_index(g, read);
+	}
+
+	if (g->ops.gr.fecs_trace.vm_dev_update) {
+		g->ops.gr.fecs_trace.vm_dev_update(g, vm_update_mask);
+	}
+
+done:
+	gk20a_idle(g);
+done_unlock:
+	nvgpu_mutex_release(&trace->poll_lock);
+	return err;
+}
+
+static int nvgpu_gr_fecs_trace_periodic_polling(void *arg)
+{
+	struct gk20a *g = (struct gk20a *)arg;
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+
+	nvgpu_log(g, gpu_dbg_ctxsw, "thread running");
+
+	while (!nvgpu_thread_should_stop(&trace->poll_task) &&
+			trace->enable_count > 0U) {
+
+		nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US,
+				   GK20A_FECS_TRACE_FRAME_PERIOD_US * 2U);
+
+		nvgpu_gr_fecs_trace_poll(g);
+	}
+
+	return 0;
+}
+
+int nvgpu_gr_fecs_trace_reset(struct gk20a *g)
+{
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
+
+	if (!g->ops.gr.fecs_trace.is_enabled(g))
+		return 0;
+
+	nvgpu_gr_fecs_trace_poll(g);
+	return g->ops.gr.fecs_trace.set_read_index(g, 0);
+}
+
+/*
+ * map global circ_buf to the context space and store the GPU VA
+ * in the context header.
+ */
+int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g,
+	struct nvgpu_mem *inst_block, struct nvgpu_gr_subctx *subctx,
+	struct nvgpu_gr_ctx *gr_ctx, pid_t pid, u32 vmid)
+{
+	u64 addr = 0ULL;
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+	struct nvgpu_mem *mem;
+	struct nvgpu_gr_global_ctx_buffer_desc *gr_global_ctx_buffer =
+				nvgpu_gr_get_global_ctx_buffer_ptr(g);
+	u32 context_ptr;
+	u32 aperture_mask;
+	int ret;
+
+	if (trace == NULL) {
+		return -EINVAL;
+	}
+
+	context_ptr = nvgpu_inst_block_ptr(g, inst_block);
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
+			"pid=%d context_ptr=%x inst_block=%llx",
+			pid, context_ptr,
+			nvgpu_inst_block_addr(g, inst_block));
+
+	mem = nvgpu_gr_global_ctx_buffer_get_mem(gr_global_ctx_buffer,
+					NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER);
+	if (mem == NULL) {
+		return -EINVAL;
+	}
+
+	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
+		addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
+				NVGPU_GR_CTX_FECS_TRACE_BUFFER_VA);
+		nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
+		aperture_mask = 0;
+	} else {
+		addr = nvgpu_inst_block_addr(g, mem);
+		nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
+		aperture_mask =
+		       g->ops.gr.ctxsw_prog.get_ts_buffer_aperture_mask(g, mem);
+	}
+	if (addr == 0ULL) {
+		return -ENOMEM;
+	}
+
+	mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
+
+	nvgpu_log(g, gpu_dbg_ctxsw, "addr=%llx count=%d", addr,
+		GK20A_FECS_TRACE_NUM_RECORDS);
+
+	g->ops.gr.ctxsw_prog.set_ts_num_records(g, mem,
+		GK20A_FECS_TRACE_NUM_RECORDS);
+
+	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA) && subctx != NULL) {
+		mem = nvgpu_gr_subctx_get_ctx_header(subctx);
+	}
+
+	g->ops.gr.ctxsw_prog.set_ts_buffer_ptr(g, mem, addr, aperture_mask);
+
+	ret = nvgpu_gr_fecs_trace_add_context(g, context_ptr, pid, vmid,
+		&trace->context_list);
+
+	return ret;
+}
+
+int nvgpu_gr_fecs_trace_unbind_channel(struct gk20a *g,
+	struct nvgpu_mem *inst_block)
+{
+	struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
+	u32 context_ptr;
+
+	if (trace == NULL) {
+		return -EINVAL;
+	}
+
+	context_ptr = nvgpu_inst_block_ptr(g, inst_block);
+
+	nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
+		"context_ptr=%x", context_ptr);
+
+	if (g->ops.gr.fecs_trace.is_enabled(g)) {
+		if (g->ops.gr.fecs_trace.flush) {
+			g->ops.gr.fecs_trace.flush(g);
+		}
+		nvgpu_gr_fecs_trace_poll(g);
+	}
+
+	nvgpu_gr_fecs_trace_remove_context(g, context_ptr,
+		&trace->context_list);
+
+	return 0;
+}
--- a/drivers/gpu/nvgpu/common/gr/fs_state.c
+++ b/drivers/gpu/nvgpu/common/gr/fs_state.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/static_analysis.h>
+
+#include <nvgpu/gr/config.h>
+#include <nvgpu/gr/fs_state.h>
+#include <nvgpu/gr/gr_instances.h>
+#include <nvgpu/grmgr.h>
+
+static int gr_load_sm_id_config(struct gk20a *g, struct nvgpu_gr_config *config)
+{
+	int err;
+	u32 *tpc_sm_id;
+	u32 sm_id_size = g->ops.gr.init.get_sm_id_size();
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
+
+	tpc_sm_id = nvgpu_kcalloc(g, sm_id_size, sizeof(u32));
+	if (tpc_sm_id == NULL) {
+		return -ENOMEM;
+	}
+
+	err = g->ops.gr.init.sm_id_config(g, tpc_sm_id, config, NULL, false);
+
+	nvgpu_kfree(g, tpc_sm_id);
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
+	return err;
+}
+
+static void gr_load_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config)
+{
+	u32 pes_tpc_mask = 0;
+	u32 gpc, pes;
+	u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
+						     GPU_LIT_NUM_TPC_PER_GPC);
+#ifdef CONFIG_NVGPU_NON_FUSA
+	u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(config);
+	u32 fuse_tpc_mask;
+	u32 val;
+	u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
+	u32 gpc_phys_id;
+#endif
+
+	/* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */
+	for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(config); gpc++) {
+		for (pes = 0;
+		     pes < nvgpu_gr_config_get_pe_count_per_gpc(config);
+		     pes++) {
+			pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask(
+						config, gpc, pes) <<
+				nvgpu_safe_mult_u32(num_tpc_per_gpc, gpc);
+		}
+	}
+
+	nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask);
+
+#ifdef CONFIG_NVGPU_NON_FUSA
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+		/*
+		 * Fuse registers must be queried with physical gpc-id and not
+		 * the logical ones. For tu104 and before chips logical gpc-id
+		 * is same as physical gpc-id for non-floorswept config but for
+		 * chips after tu104 it may not be true.
+		 */
+		gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
+				cur_gr_instance, 0U);
+		fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
+		if ((g->tpc_fs_mask_user != 0U) &&
+					(g->tpc_fs_mask_user != fuse_tpc_mask)) {
+			if (fuse_tpc_mask == nvgpu_safe_sub_u32(BIT32(max_tpc_count),
+									U32(1))) {
+				val = g->tpc_fs_mask_user;
+				val &= nvgpu_safe_sub_u32(BIT32(max_tpc_count), U32(1));
+				/*
+				 * skip tpc to disable the other tpc cause channel
+				 * timeout
+				 */
+				val = nvgpu_safe_sub_u32(BIT32(hweight32(val)), U32(1));
+				pes_tpc_mask = val;
+			}
+		}
+	}
+#endif
+
+	g->ops.gr.init.tpc_mask(g, 0, pes_tpc_mask);
+}
+
+int nvgpu_gr_fs_state_init(struct gk20a *g, struct nvgpu_gr_config *config)
+{
+	u32 tpc_index, gpc_index;
+	u32 sm_id = 0;
+#ifdef CONFIG_NVGPU_NON_FUSA
+	u32 fuse_tpc_mask;
+	u32 max_tpc_cnt;
+	u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
+	u32 gpc_phys_id;
+#endif
+	u32 gpc_cnt, tpc_cnt;
+	u32 num_sm;
+	int err = 0;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
+
+	g->ops.gr.init.fs_state(g);
+
+	err = g->ops.gr.config.init_sm_id_table(g, config);
+	if (err != 0) {
+		return err;
+	}
+
+	num_sm = nvgpu_gr_config_get_no_of_sm(config);
+	nvgpu_assert(num_sm > 0U);
+
+	for (sm_id = 0; sm_id < num_sm; sm_id++) {
+		struct nvgpu_sm_info *sm_info =
+			nvgpu_gr_config_get_sm_info(config, sm_id);
+		tpc_index = nvgpu_gr_config_get_sm_info_tpc_index(sm_info);
+		gpc_index = nvgpu_gr_config_get_sm_info_gpc_index(sm_info);
+
+		g->ops.gr.init.sm_id_numbering(g, gpc_index, tpc_index, sm_id,
+					       config, NULL, false);
+	}
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+		g->ops.gr.init.pd_tpc_per_gpc(g, config);
+	}
+
+#ifdef CONFIG_NVGPU_GRAPHICS
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+		/* gr__setup_pd_mapping */
+		g->ops.gr.init.rop_mapping(g, config);
+		g->ops.gr.init.pd_skip_table_gpc(g, config);
+	}
+#endif
+
+	gpc_cnt = nvgpu_gr_config_get_gpc_count(config);
+	tpc_cnt = nvgpu_gr_config_get_tpc_count(config);
+
+#ifdef CONFIG_NVGPU_NON_FUSA
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+		/*
+		 * Fuse registers must be queried with physical gpc-id and not
+		 * the logical ones. For tu104 and before chips logical gpc-id
+		 * is same as physical gpc-id for non-floorswept config but for
+		 * chips after tu104 it may not be true.
+		 */
+		gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
+				cur_gr_instance, 0U);
+		fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
+		max_tpc_cnt = nvgpu_gr_config_get_max_tpc_count(config);
+
+		if ((g->tpc_fs_mask_user != 0U) &&
+			(fuse_tpc_mask ==
+				nvgpu_safe_sub_u32(BIT32(max_tpc_cnt), U32(1)))) {
+			u32 val = g->tpc_fs_mask_user;
+			val &= nvgpu_safe_sub_u32(BIT32(max_tpc_cnt), U32(1));
+			tpc_cnt = (u32)hweight32(val);
+		}
+	}
+#endif
+
+	g->ops.gr.init.cwd_gpcs_tpcs_num(g, gpc_cnt, tpc_cnt);
+
+	gr_load_tpc_mask(g, config);
+
+	err = gr_load_sm_id_config(g, config);
+	if (err != 0) {
+		nvgpu_err(g, "load_smid_config failed err=%d", err);
+	}
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
+	return err;
+}
+
--- a/drivers/gpu/nvgpu/common/gr/global_ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/global_ctx.c
@@ -0,0 +1,477 @@
+/*
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/log.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/dma.h>
+#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
+#include <nvgpu/static_analysis.h>
+#include <nvgpu/string.h>
+#endif
+
+#include <nvgpu/gr/global_ctx.h>
+
+#include "global_ctx_priv.h"
+
+#ifdef NVGPU_UNITTEST_FAULT_INJECTION_ENABLEMENT
+#include <nvgpu/posix/posix-fault-injection.h>
+
+struct nvgpu_posix_fault_inj *nvgpu_golden_ctx_verif_get_fault_injection(void)
+{
+	struct nvgpu_posix_fault_inj_container *c =
+		nvgpu_posix_fault_injection_get_container();
+
+	return &c->golden_ctx_verif_fi;
+}
+
+struct nvgpu_posix_fault_inj *nvgpu_local_golden_image_get_fault_injection(void)
+{
+	struct nvgpu_posix_fault_inj_container *c =
+		nvgpu_posix_fault_injection_get_container();
+
+	return &c->local_golden_image_fi;
+}
+#endif
+
+struct nvgpu_gr_global_ctx_buffer_desc *
+nvgpu_gr_global_ctx_desc_alloc(struct gk20a *g)
+{
+	struct nvgpu_gr_global_ctx_buffer_desc *desc =
+		nvgpu_kzalloc(g, sizeof(*desc) *
+					U64(NVGPU_GR_GLOBAL_CTX_COUNT));
+	return desc;
+}
+
+void nvgpu_gr_global_ctx_desc_free(struct gk20a *g,
+	struct nvgpu_gr_global_ctx_buffer_desc *desc)
+{
+	nvgpu_kfree(g, desc);
+}
+
+
+void nvgpu_gr_global_ctx_set_size(struct nvgpu_gr_global_ctx_buffer_desc *desc,
+	u32 index, size_t size)
+{
+	nvgpu_assert(index < NVGPU_GR_GLOBAL_CTX_COUNT);
+	desc[index].size = size;
+}
+
+size_t nvgpu_gr_global_ctx_get_size(struct nvgpu_gr_global_ctx_buffer_desc *desc,
+	u32 index)
+{
+	return desc[index].size;
+}
+
+static void nvgpu_gr_global_ctx_buffer_destroy(struct gk20a *g,
+		struct nvgpu_mem *mem)
+{
+	nvgpu_dma_free(g, mem);
+}
+
+void nvgpu_gr_global_ctx_buffer_free(struct gk20a *g,
+	struct nvgpu_gr_global_ctx_buffer_desc *desc)
+{
+	u32 i;
+
+	if (desc == NULL) {
+		return;
+	}
+
+	for (i = 0; i < NVGPU_GR_GLOBAL_CTX_COUNT; i++) {
+		if (desc[i].destroy != NULL) {
+			desc[i].destroy(g, &desc[i].mem);
+			desc[i].destroy = NULL;
+		}
+	}
+
+	nvgpu_log_fn(g, "done");
+}
+
+static int nvgpu_gr_global_ctx_buffer_alloc_sys(struct gk20a *g,
+	struct nvgpu_gr_global_ctx_buffer_desc *desc,
+	u32 index)
+{
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	if (nvgpu_mem_is_valid(&desc[index].mem)) {
+		return 0;
+	}
+
+	err = nvgpu_dma_alloc_sys(g, desc[index].size,
+			&desc[index].mem);
+	if (err != 0) {
+		return err;
+	}
+
+	desc[index].destroy = nvgpu_gr_global_ctx_buffer_destroy;
+
+	return err;
+}
+
+#ifdef CONFIG_NVGPU_VPR
+static int nvgpu_gr_global_ctx_buffer_alloc_vpr(struct gk20a *g,
+	struct nvgpu_gr_global_ctx_buffer_desc *desc,
+	u32 index)
+{
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	if (nvgpu_mem_is_valid(&desc[index].mem)) {
+		return 0;
+	}
+
+	if (g->ops.secure_alloc != NULL) {
+		err = g->ops.secure_alloc(g,
+				&desc[index].mem, desc[index].size,
+				&desc[index].destroy);
+		if (err != 0) {
+			return err;
+		}
+	}
+
+	return err;
+}
+#endif
+
+static bool nvgpu_gr_global_ctx_buffer_sizes_are_valid(struct gk20a *g,
+				struct nvgpu_gr_global_ctx_buffer_desc *desc)
+{
+
+	if (desc[NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP].size == 0U) {
+		return false;
+	}
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+		if ((desc[NVGPU_GR_GLOBAL_CTX_CIRCULAR].size == 0U) ||
+			(desc[NVGPU_GR_GLOBAL_CTX_PAGEPOOL].size == 0U) ||
+			(desc[NVGPU_GR_GLOBAL_CTX_ATTRIBUTE].size == 0U)) {
+			return false;
+		}
+#ifdef CONFIG_NVGPU_VPR
+		if ((desc[NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR].size == 0U) ||
+			(desc[NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR].size == 0U) ||
+			(desc[NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR].size == 0U)) {
+			return false;
+		}
+#endif
+	}
+
+	return true;
+}
+
+#ifdef CONFIG_NVGPU_VPR
+static int nvgpu_gr_global_ctx_buffer_vpr_alloc(struct gk20a *g,
+				struct nvgpu_gr_global_ctx_buffer_desc *desc)
+{
+	int err = 0;
+
+	/*
+	 * MIG supports only compute class.
+	 * Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB
+	 * if 2D/3D/I2M classes(graphics) are supported.
+	 */
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+		nvgpu_log(g, gpu_dbg_gr | gpu_dbg_mig,
+			"2D class is not supported "
+				"skip BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB "
+				"and RTV_CB");
+		return 0;
+	}
+
+	err = nvgpu_gr_global_ctx_buffer_alloc_vpr(g, desc,
+		NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR);
+	if (err != 0) {
+		goto fail;
+	}
+
+	err = nvgpu_gr_global_ctx_buffer_alloc_vpr(g, desc,
+		NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR);
+	if (err != 0) {
+		goto fail;
+	}
+
+	err = nvgpu_gr_global_ctx_buffer_alloc_vpr(g, desc,
+		NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR);
+	if (err != 0) {
+		goto fail;
+	}
+fail:
+	return err;
+}
+#endif
+
+static int nvgpu_gr_global_ctx_buffer_sys_alloc(struct gk20a *g,
+				struct nvgpu_gr_global_ctx_buffer_desc *desc)
+{
+	int err = 0;
+
+	/*
+	 * MIG supports only compute class.
+	 * Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB
+	 * if 2D/3D/I2M classes(graphics) are supported.
+	 */
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+		err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
+			NVGPU_GR_GLOBAL_CTX_CIRCULAR);
+		if (err != 0) {
+			goto fail;
+		}
+
+		err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
+			NVGPU_GR_GLOBAL_CTX_PAGEPOOL);
+		if (err != 0) {
+			goto fail;
+		}
+
+		err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
+			NVGPU_GR_GLOBAL_CTX_ATTRIBUTE);
+		if (err != 0) {
+			goto fail;
+		}
+	}
+
+	err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
+		NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP);
+	if (err != 0) {
+		goto fail;
+	}
+fail:
+	return err;
+}
+
+
+int nvgpu_gr_global_ctx_buffer_alloc(struct gk20a *g,
+	struct nvgpu_gr_global_ctx_buffer_desc *desc)
+{
+	int err = 0;
+
+	if (nvgpu_gr_global_ctx_buffer_sizes_are_valid(g, desc) != true) {
+		return -EINVAL;
+	}
+
+	err = nvgpu_gr_global_ctx_buffer_sys_alloc(g, desc);
+	if (err != 0) {
+		goto clean_up;
+	}
+
+#ifdef CONFIG_NVGPU_FECS_TRACE
+	if (desc[NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER].size != 0U) {
+		err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
+			NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER);
+		if (err != 0) {
+			goto clean_up;
+		}
+	}
+#endif
+
+#ifdef CONFIG_NVGPU_GRAPHICS
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+		if (desc[NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER].size != 0U) {
+			err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
+				NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER);
+			if (err != 0) {
+				goto clean_up;
+			}
+		}
+	}
+#endif
+
+#ifdef CONFIG_NVGPU_VPR
+	if (nvgpu_gr_global_ctx_buffer_vpr_alloc(g, desc) != 0) {
+			goto clean_up;
+	}
+#endif
+
+	return err;
+
+clean_up:
+	nvgpu_gr_global_ctx_buffer_free(g, desc);
+	return err;
+}
+
+u64 nvgpu_gr_global_ctx_buffer_map(struct nvgpu_gr_global_ctx_buffer_desc *desc,
+	u32 index,
+	struct vm_gk20a *vm, u32 flags, bool priv)
+{
+	u64 gpu_va;
+
+	if (!nvgpu_mem_is_valid(&desc[index].mem)) {
+		return 0;
+	}
+
+	gpu_va = nvgpu_gmmu_map(vm, &desc[index].mem, desc[index].mem.size,
+			flags, gk20a_mem_flag_none, priv,
+			desc[index].mem.aperture);
+	return gpu_va;
+}
+
+void nvgpu_gr_global_ctx_buffer_unmap(
+	struct nvgpu_gr_global_ctx_buffer_desc *desc,
+	u32 index,
+	struct vm_gk20a *vm, u64 gpu_va)
+{
+	if (nvgpu_mem_is_valid(&desc[index].mem)) {
+		nvgpu_gmmu_unmap(vm, &desc[index].mem, gpu_va);
+	}
+}
+
+struct nvgpu_mem *nvgpu_gr_global_ctx_buffer_get_mem(
+	struct nvgpu_gr_global_ctx_buffer_desc *desc,
+	u32 index)
+{
+	if (nvgpu_mem_is_valid(&desc[index].mem)) {
+		return &desc[index].mem;
+	}
+	return NULL;
+}
+
+bool nvgpu_gr_global_ctx_buffer_ready(
+	struct nvgpu_gr_global_ctx_buffer_desc *desc,
+	u32 index)
+{
+	if (nvgpu_mem_is_valid(&desc[index].mem)) {
+		return true;
+	}
+	return false;
+}
+
+struct nvgpu_gr_global_ctx_local_golden_image *
+nvgpu_gr_global_ctx_init_local_golden_image(struct gk20a *g,
+	struct nvgpu_mem *source_mem, size_t size)
+{
+	struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image;
+
+#ifdef NVGPU_UNITTEST_FAULT_INJECTION_ENABLEMENT
+	if (nvgpu_posix_fault_injection_handle_call(
+			nvgpu_local_golden_image_get_fault_injection())) {
+		return NULL;
+	}
+#endif
+
+	local_golden_image = nvgpu_kzalloc(g, sizeof(*local_golden_image));
+	if (local_golden_image == NULL) {
+		return NULL;
+	}
+
+	local_golden_image->context = nvgpu_vzalloc(g, size);
+	if (local_golden_image->context == NULL) {
+		nvgpu_kfree(g, local_golden_image);
+		return NULL;
+	}
+
+	local_golden_image->size = size;
+
+	nvgpu_mem_rd_n(g, source_mem, 0, local_golden_image->context,
+		nvgpu_safe_cast_u64_to_u32(size));
+
+	return local_golden_image;
+}
+
+#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
+bool nvgpu_gr_global_ctx_compare_golden_images(struct gk20a *g,
+	bool is_sysmem,
+	struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image1,
+	struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image2,
+	size_t size)
+{
+	bool is_identical = true;
+	u32 *data1 = local_golden_image1->context;
+	u32 *data2 = local_golden_image2->context;
+#ifdef CONFIG_NVGPU_DGPU
+	u32 i;
+#endif
+
+#ifdef NVGPU_UNITTEST_FAULT_INJECTION_ENABLEMENT
+	if (nvgpu_posix_fault_injection_handle_call(
+			nvgpu_golden_ctx_verif_get_fault_injection())) {
+		return false;
+	}
+#endif
+
+	/*
+	 * In case of sysmem, direct mem compare can be used.
+	 * For vidmem, word by word comparison only works and
+	 * it is too early to use ce engine for read operations.
+	 */
+	if (is_sysmem) {
+		if (nvgpu_memcmp((u8 *)data1, (u8 *)data2, size) != 0) {
+			is_identical = false;
+		}
+	}
+	else {
+#ifdef CONFIG_NVGPU_DGPU
+		for( i = 0U; i < nvgpu_safe_cast_u64_to_u32(size/sizeof(u32));
+					i = nvgpu_safe_add_u32(i, 1U)) {
+			if (*(data1 + i) != *(data2 + i)) {
+				is_identical = false;
+				nvgpu_log_info(g,
+				"mismatch i = %u golden1: %u golden2 %u",
+				i, *(data1 + i), *(data2 + i));
+				break;
+			}
+		}
+#else
+		is_identical = false;
+#endif
+	}
+
+	nvgpu_log_info(g, "%s result %u", __func__, is_identical);
+	return is_identical;
+}
+#endif
+
+void nvgpu_gr_global_ctx_load_local_golden_image(struct gk20a *g,
+	struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image,
+	struct nvgpu_mem *target_mem)
+{
+	/* Channel gr_ctx buffer is gpu cacheable.
+	   Flush and invalidate before cpu update. */
+	if (g->ops.mm.cache.l2_flush(g, true) != 0) {
+		nvgpu_err(g, "l2_flush failed");
+	}
+
+	nvgpu_mem_wr_n(g, target_mem, 0, local_golden_image->context,
+		nvgpu_safe_cast_u64_to_u32(local_golden_image->size));
+
+	nvgpu_log(g, gpu_dbg_gr, "loaded saved golden image into gr_ctx");
+}
+
+void nvgpu_gr_global_ctx_deinit_local_golden_image(struct gk20a *g,
+	struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image)
+{
+	nvgpu_vfree(g, local_golden_image->context);
+	nvgpu_kfree(g, local_golden_image);
+}
+
+#ifdef CONFIG_NVGPU_DEBUGGER
+u32 *nvgpu_gr_global_ctx_get_local_golden_image_ptr(
+	struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image)
+{
+	return local_golden_image->context;
+}
+#endif
--- a/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h
+++ b/drivers/gpu/nvgpu/common/gr/global_ctx_priv.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_GR_GLOBAL_CTX_PRIV_H
+#define NVGPU_GR_GLOBAL_CTX_PRIV_H
+
+/**
+ * Global context buffer descriptor structure.
+ *
+ * This structure stores properties applicable to each global
+ * context buffer.
+ */
+struct nvgpu_gr_global_ctx_buffer_desc {
+	/**
+	 * Memory to hold global context buffer.
+	 */
+	struct nvgpu_mem mem;
+
+	/**
+	 * Size of global context buffer.
+	 */
+	size_t size;
+
+	/**
+	 * Function pointer to free global context buffer.
+	 */
+	global_ctx_mem_destroy_fn destroy;
+};
+
+/**
+ * Local Golden context image descriptor structure.
+ *
+ * This structure stores details of a local Golden context image.
+ * Pointer to this struct is maintained in
+ * #nvgpu_gr_obj_ctx_golden_image structure.
+ */
+struct nvgpu_gr_global_ctx_local_golden_image {
+	/**
+	 * Pointer to local Golden context image memory.
+	 */
+	u32 *context;
+
+	/**
+	 * Size of local Golden context image.
+	 */
+	size_t size;
+};
+
+#endif /* NVGPU_GR_GLOBAL_CTX_PRIV_H */
--- a/drivers/gpu/nvgpu/common/gr/gr.c
+++ b/drivers/gpu/nvgpu/common/gr/gr.c
--- a/drivers/gpu/nvgpu/common/gr/gr_config.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_config.c
@@ -0,0 +1,864 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/io.h>
+#include <nvgpu/static_analysis.h>
+#include <nvgpu/gr/config.h>
+#include <nvgpu/gr/gr_instances.h>
+#include <nvgpu/grmgr.h>
+
+#include "gr_config_priv.h"
+
+static void gr_config_init_pes_tpc(struct gk20a *g,
+				struct nvgpu_gr_config *config,
+				u32 gpc_index)
+{
+	u32 pes_index;
+	u32 pes_tpc_mask;
+	u32 pes_tpc_count;
+
+	for (pes_index = 0; pes_index < config->pe_count_per_gpc;
+			    pes_index++) {
+		pes_tpc_mask = g->ops.gr.config.get_pes_tpc_mask(g,
+					config, gpc_index, pes_index);
+		pes_tpc_count = hweight32(pes_tpc_mask);
+
+		/* detect PES presence by seeing if there are
+		 * TPCs connected to it.
+		 */
+		if (pes_tpc_count != 0U) {
+			config->gpc_ppc_count[gpc_index] = nvgpu_safe_add_u32(
+				config->gpc_ppc_count[gpc_index], 1U);
+		}
+
+		config->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
+		config->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
+	}
+}
+
+static void gr_config_init_gpc_skip_mask(struct nvgpu_gr_config *config,
+					u32 gpc_index)
+{
+	u32 pes_heavy_index;
+	u32 gpc_new_skip_mask = 0U;
+	u32 pes_tpc_cnt = 0U, pes_tpc_mask = 0U;
+
+	if (config->pe_count_per_gpc <= 1U) {
+		goto skip_mask_end;
+	}
+
+	pes_tpc_cnt = nvgpu_safe_add_u32(
+		config->pes_tpc_count[0][gpc_index],
+		config->pes_tpc_count[1][gpc_index]);
+
+	pes_heavy_index =
+		(config->pes_tpc_count[0][gpc_index] >
+			config->pes_tpc_count[1][gpc_index]) ? 0U : 1U;
+
+	if ((pes_tpc_cnt == 5U) || ((pes_tpc_cnt == 4U) &&
+		   (config->pes_tpc_count[0][gpc_index] !=
+		    config->pes_tpc_count[1][gpc_index]))) {
+		pes_tpc_mask = nvgpu_safe_sub_u32(
+			config->pes_tpc_mask[pes_heavy_index][gpc_index], 1U);
+		gpc_new_skip_mask =
+			config->pes_tpc_mask[pes_heavy_index][gpc_index] ^
+			   (config->pes_tpc_mask[pes_heavy_index][gpc_index] &
+			   pes_tpc_mask);
+	}
+
+skip_mask_end:
+	config->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
+}
+
+static void gr_config_log_info(struct gk20a *g,
+					struct nvgpu_gr_config *config)
+{
+	u32 gpc_index, pes_index;
+
+	nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_gpc_count: %d", config->max_gpc_count);
+	nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_count: %d", config->gpc_count);
+	nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_mask: 0x%x", config->gpc_mask);
+	nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_tpc_per_gpc_count: %d", config->max_tpc_per_gpc_count);
+	nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_tpc_count: %d", config->max_tpc_count);
+	nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "tpc_count: %d", config->tpc_count);
+	nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "sm_count_per_tpc: %d", config->sm_count_per_tpc);
+#ifdef CONFIG_NVGPU_GRAPHICS
+	nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_zcull_per_gpc_count: %d", config->max_zcull_per_gpc_count);
+	nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "zcb_count: %d", config->zcb_count);
+#endif
+	nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pe_count_per_gpc: %d", config->pe_count_per_gpc);
+	nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "ppc_count: %d", config->ppc_count);
+
+	for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
+		nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_tpc_count[%d] : %d",
+			   gpc_index, config->gpc_tpc_count[gpc_index]);
+	}
+	for (gpc_index = 0; gpc_index < config->max_gpc_count; gpc_index++) {
+		nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_tpc_mask[%d] : 0x%x",
+			   gpc_index, config->gpc_tpc_mask[gpc_index]);
+	}
+#ifdef CONFIG_NVGPU_GRAPHICS
+	for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
+		nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_zcb_count[%d] : %d",
+			   gpc_index, config->gpc_zcb_count != NULL ?
+				      config->gpc_zcb_count[gpc_index] : 0U);
+	}
+#endif
+	for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
+		nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_ppc_count[%d] : %d",
+			   gpc_index, config->gpc_ppc_count[gpc_index]);
+	}
+	for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
+		nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_skip_mask[%d] : 0x%x",
+			   gpc_index, config->gpc_skip_mask[gpc_index]);
+	}
+	for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
+		for (pes_index = 0;
+		     pes_index < config->pe_count_per_gpc;
+		     pes_index++) {
+			nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pes_tpc_count[%d][%d] : %d",
+				   pes_index, gpc_index,
+				   config->pes_tpc_count[pes_index][gpc_index]);
+		}
+	}
+	for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
+		for (pes_index = 0;
+		     pes_index < config->pe_count_per_gpc;
+		     pes_index++) {
+			nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pes_tpc_mask[%d][%d] : 0x%x",
+				   pes_index, gpc_index,
+				   config->pes_tpc_mask[pes_index][gpc_index]);
+		}
+	}
+}
+
+static void gr_config_set_gpc_mask(struct gk20a *g,
+					struct nvgpu_gr_config *config)
+{
+#ifdef CONFIG_NVGPU_DGPU
+	if (g->ops.gr.config.get_gpc_mask != NULL) {
+		config->gpc_mask = g->ops.gr.config.get_gpc_mask(g);
+	} else
+#endif
+	{
+		config->gpc_mask = nvgpu_safe_sub_u32(BIT32(config->gpc_count),
+								1U);
+	}
+}
+
+static bool gr_config_alloc_valid(struct nvgpu_gr_config *config)
+{
+	if ((config->gpc_tpc_count == NULL) || (config->gpc_tpc_mask == NULL) ||
+	    (config->gpc_ppc_count == NULL) ||
+	    (config->gpc_skip_mask == NULL)) {
+		return false;
+	}
+
+#ifdef CONFIG_NVGPU_GRAPHICS
+	if (!nvgpu_is_enabled(config->g, NVGPU_SUPPORT_MIG) &&
+			(config->gpc_zcb_count == NULL)) {
+		return false;
+	}
+#endif
+
+	return true;
+}
+
+static void gr_config_free_mem(struct gk20a *g,
+				struct nvgpu_gr_config *config)
+{
+	u32 pes_index;
+
+	for (pes_index = 0U; pes_index < config->pe_count_per_gpc; pes_index++) {
+		nvgpu_kfree(g, config->pes_tpc_count[pes_index]);
+		nvgpu_kfree(g, config->pes_tpc_mask[pes_index]);
+	}
+
+	nvgpu_kfree(g, config->gpc_skip_mask);
+	nvgpu_kfree(g, config->gpc_ppc_count);
+#ifdef CONFIG_NVGPU_GRAPHICS
+	nvgpu_kfree(g, config->gpc_zcb_count);
+#endif
+	nvgpu_kfree(g, config->gpc_tpc_mask);
+	nvgpu_kfree(g, config->gpc_tpc_count);
+}
+
+static bool gr_config_alloc_struct_mem(struct gk20a *g,
+				struct nvgpu_gr_config *config)
+{
+	u32 pes_index;
+	u32 total_tpc_cnt;
+	size_t sm_info_size;
+	size_t gpc_size, sm_size, max_gpc_cnt;
+	size_t pd_tbl_size;
+
+	total_tpc_cnt = nvgpu_safe_mult_u32(config->gpc_count,
+				config->max_tpc_per_gpc_count);
+	sm_size = nvgpu_safe_mult_u64((size_t)config->sm_count_per_tpc,
+				sizeof(struct nvgpu_sm_info));
+	/* allocate for max tpc per gpc */
+	sm_info_size = nvgpu_safe_mult_u64((size_t)total_tpc_cnt, sm_size);
+
+	config->sm_to_cluster = nvgpu_kzalloc(g, sm_info_size);
+	if (config->sm_to_cluster == NULL) {
+		nvgpu_err(g, "sm_to_cluster == NULL");
+		goto alloc_err;
+	}
+
+#ifdef CONFIG_NVGPU_SM_DIVERSITY
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SM_DIVERSITY)) {
+		config->sm_to_cluster_redex_config =
+			nvgpu_kzalloc(g, sm_info_size);
+		if (config->sm_to_cluster_redex_config == NULL) {
+			nvgpu_err(g, "sm_to_cluster_redex_config == NULL");
+			goto clean_alloc_mem;
+		}
+	}
+#endif
+	config->no_of_sm = 0;
+
+	gpc_size = nvgpu_safe_mult_u64((size_t)config->gpc_count, sizeof(u32));
+	max_gpc_cnt = nvgpu_safe_mult_u64((size_t)config->max_gpc_count, sizeof(u32));
+	config->gpc_tpc_count = nvgpu_kzalloc(g, gpc_size);
+	config->gpc_tpc_mask = nvgpu_kzalloc(g, max_gpc_cnt);
+#ifdef CONFIG_NVGPU_GRAPHICS
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+		config->max_zcull_per_gpc_count = nvgpu_get_litter_value(g,
+			GPU_LIT_NUM_ZCULL_BANKS);
+
+		config->gpc_zcb_count = nvgpu_kzalloc(g, gpc_size);
+	}
+#endif
+	config->gpc_ppc_count = nvgpu_kzalloc(g, gpc_size);
+
+	pd_tbl_size = nvgpu_safe_mult_u64(
+			(size_t)g->ops.gr.config.get_pd_dist_skip_table_size(),
+			sizeof(u32));
+	pd_tbl_size = nvgpu_safe_mult_u64(pd_tbl_size, 4UL);
+	config->gpc_skip_mask = nvgpu_kzalloc(g, pd_tbl_size);
+
+	if (gr_config_alloc_valid(config) == false) {
+		goto clean_alloc_mem;
+	}
+
+	for (pes_index = 0U; pes_index < config->pe_count_per_gpc; pes_index++) {
+		config->pes_tpc_count[pes_index] = nvgpu_kzalloc(g, gpc_size);
+		config->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g, gpc_size);
+		if ((config->pes_tpc_count[pes_index] == NULL) ||
+		    (config->pes_tpc_mask[pes_index] == NULL)) {
+			goto clean_alloc_mem;
+		}
+	}
+
+	return true;
+
+clean_alloc_mem:
+	nvgpu_kfree(g, config->sm_to_cluster);
+	config->sm_to_cluster = NULL;
+#ifdef CONFIG_NVGPU_SM_DIVERSITY
+	if (config->sm_to_cluster_redex_config != NULL) {
+		nvgpu_kfree(g, config->sm_to_cluster_redex_config);
+		config->sm_to_cluster_redex_config = NULL;
+	}
+#endif
+	gr_config_free_mem(g, config);
+
+alloc_err:
+	return false;
+}
+
+static int gr_config_init_mig_gpcs(struct nvgpu_gr_config *config)
+{
+	struct gk20a *g = config->g;
+	u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
+
+	config->max_gpc_count = nvgpu_grmgr_get_max_gpc_count(g);
+	config->gpc_count = nvgpu_grmgr_get_gr_num_gpcs(g, cur_gr_instance);
+	if (config->gpc_count == 0U) {
+		nvgpu_err(g, "gpc_count==0!");
+		return -EINVAL;
+	}
+
+	config->gpc_mask = nvgpu_grmgr_get_gr_logical_gpc_mask(
+		g, cur_gr_instance);
+
+	return 0;
+}
+
+static int gr_config_init_gpcs(struct nvgpu_gr_config *config)
+{
+	struct gk20a *g = config->g;
+
+	config->max_gpc_count = g->ops.top.get_max_gpc_count(g);
+	config->gpc_count = g->ops.priv_ring.get_gpc_count(g);
+	if (config->gpc_count == 0U) {
+		nvgpu_err(g, "gpc_count==0!");
+		return -EINVAL;
+	}
+
+	gr_config_set_gpc_mask(g, config);
+
+	return 0;
+}
+
+struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
+{
+	struct nvgpu_gr_config *config;
+	u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
+	u32 gpc_index;
+	u32 gpc_phys_id;
+	int err;
+
+	config = nvgpu_kzalloc(g, sizeof(*config));
+	if (config == NULL) {
+		return NULL;
+	}
+
+	config->g = g;
+
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+		err = gr_config_init_mig_gpcs(config);
+		if (err < 0) {
+			nvgpu_err(g, "MIG GPC config init failed");
+			nvgpu_kfree(g, config);
+			return NULL;
+		}
+	} else {
+		err = gr_config_init_gpcs(config);
+		if (err < 0) {
+			nvgpu_err(g, "GPC config init failed");
+			nvgpu_kfree(g, config);
+			return NULL;
+		}
+	}
+
+	/* Required to read gpc_tpc_mask below */
+	config->max_tpc_per_gpc_count = g->ops.top.get_max_tpc_per_gpc_count(g);
+
+	config->max_tpc_count = nvgpu_safe_mult_u32(config->max_gpc_count,
+				config->max_tpc_per_gpc_count);
+
+	config->pe_count_per_gpc = nvgpu_get_litter_value(g,
+		GPU_LIT_NUM_PES_PER_GPC);
+	if (config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC) {
+		nvgpu_err(g, "too many pes per gpc");
+		goto clean_up_init;
+	}
+
+	config->sm_count_per_tpc =
+		nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
+	if (config->sm_count_per_tpc == 0U) {
+		nvgpu_err(g, "sm_count_per_tpc==0!");
+		goto clean_up_init;
+	}
+
+	if (gr_config_alloc_struct_mem(g, config) == false) {
+		goto clean_up_init;
+	}
+
+	for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
+		/*
+		 * Fuse registers must be queried with physical gpc-id and not
+		 * the logical ones. For tu104 and before chips logical gpc-id
+		 * is same as physical gpc-id for non-floorswept config but for
+		 * chips after tu104 it may not be true.
+		 */
+		gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
+				cur_gr_instance, gpc_index);
+		config->gpc_tpc_mask[gpc_index] =
+		     g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
+	}
+
+	config->ppc_count = 0;
+	config->tpc_count = 0;
+#ifdef CONFIG_NVGPU_GRAPHICS
+	config->zcb_count = 0;
+#endif
+	for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
+		config->gpc_tpc_count[gpc_index] =
+			g->ops.gr.config.get_tpc_count_in_gpc(g, config,
+				gpc_index);
+		config->tpc_count = nvgpu_safe_add_u32(config->tpc_count,
+					config->gpc_tpc_count[gpc_index]);
+
+#ifdef CONFIG_NVGPU_GRAPHICS
+		if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
+			config->gpc_zcb_count[gpc_index] =
+				g->ops.gr.config.get_zcull_count_in_gpc(g, config,
+					gpc_index);
+			config->zcb_count = nvgpu_safe_add_u32(config->zcb_count,
+						config->gpc_zcb_count[gpc_index]);
+		}
+#endif
+
+		gr_config_init_pes_tpc(g, config, gpc_index);
+
+		config->ppc_count = nvgpu_safe_add_u32(config->ppc_count,
+					config->gpc_ppc_count[gpc_index]);
+
+		gr_config_init_gpc_skip_mask(config, gpc_index);
+	}
+
+	gr_config_log_info(g, config);
+	return config;
+
+clean_up_init:
+	nvgpu_kfree(g, config);
+	return NULL;
+}
+
+#ifdef CONFIG_NVGPU_GRAPHICS
+static u32 prime_set[18] = {
+	2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 };
+
+/*
+ * Return map tiles count for given index
+ * Return 0 if index is out-of-bounds
+ */
+u32 nvgpu_gr_config_get_map_tile_count(struct nvgpu_gr_config *config, u32 index)
+{
+	if (index >= config->map_tile_count) {
+		return 0;
+	}
+
+	return config->map_tiles[index];
+}
+
+u8 *nvgpu_gr_config_get_map_tiles(struct nvgpu_gr_config *config)
+{
+	return config->map_tiles;
+}
+
+u32 nvgpu_gr_config_get_map_row_offset(struct nvgpu_gr_config *config)
+{
+	return config->map_row_offset;
+}
+
+int nvgpu_gr_config_init_map_tiles(struct gk20a *g,
+	struct nvgpu_gr_config *config)
+{
+	s32 comm_denom;
+	s32 mul_factor;
+	s32 *init_frac = NULL;
+	s32 *init_err = NULL;
+	s32 *run_err = NULL;
+	u32 *sorted_num_tpcs = NULL;
+	u32 *sorted_to_unsorted_gpc_map = NULL;
+	u32 gpc_index;
+	u32 gpc_mark = 0;
+	u32 num_tpc;
+	u32 max_tpc_count = 0;
+	u32 swap;
+	u32 tile_count;
+	u32 index;
+	bool delete_map = false;
+	bool gpc_sorted;
+	int ret = 0;
+	u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
+	u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
+	u32 map_tile_count = num_gpcs * num_tpc_per_gpc;
+
+	nvgpu_log(g, gpu_dbg_gr, " ");
+
+	init_frac = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
+	init_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
+	run_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
+	sorted_num_tpcs =
+		nvgpu_kzalloc(g, (size_t)num_gpcs *
+				 (size_t)num_tpc_per_gpc *
+				 sizeof(s32));
+	sorted_to_unsorted_gpc_map =
+		nvgpu_kzalloc(g, (size_t)num_gpcs * sizeof(s32));
+
+	if (!((init_frac != NULL) &&
+	      (init_err != NULL) &&
+	      (run_err != NULL) &&
+	      (sorted_num_tpcs != NULL) &&
+	      (sorted_to_unsorted_gpc_map != NULL))) {
+		ret = -ENOMEM;
+		goto clean_up;
+	}
+
+	config->map_row_offset = 0xFFFFFFFFU;
+
+	if (config->tpc_count == 3U) {
+		config->map_row_offset = 2;
+	} else if (config->tpc_count < 3U) {
+		config->map_row_offset = 1;
+	} else {
+		config->map_row_offset = 3;
+
+		for (index = 1U; index < 18U; index++) {
+			u32 prime = prime_set[index];
+			if ((config->tpc_count % prime) != 0U) {
+				config->map_row_offset = prime;
+				break;
+			}
+		}
+	}
+
+	switch (config->tpc_count) {
+	case 15:
+		config->map_row_offset = 6;
+		break;
+	case 14:
+		config->map_row_offset = 5;
+		break;
+	case 13:
+		config->map_row_offset = 2;
+		break;
+	case 11:
+		config->map_row_offset = 7;
+		break;
+	case 10:
+		config->map_row_offset = 6;
+		break;
+	case 7:
+	case 5:
+		config->map_row_offset = 1;
+		break;
+	default:
+		nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "unsupported tpc count = %u",
+				config->tpc_count);
+		break;
+	}
+
+	if (config->map_tiles != NULL) {
+		if (config->map_tile_count != config->tpc_count) {
+			delete_map = true;
+		}
+
+		for (tile_count = 0; tile_count < config->map_tile_count; tile_count++) {
+			if (nvgpu_gr_config_get_map_tile_count(config, tile_count)
+					>= config->tpc_count) {
+				delete_map = true;
+			}
+		}
+
+		if (delete_map) {
+			nvgpu_kfree(g, config->map_tiles);
+			config->map_tiles = NULL;
+			config->map_tile_count = 0;
+		}
+	}
+
+	if (config->map_tiles == NULL) {
+		config->map_tiles = nvgpu_kzalloc(g, map_tile_count * sizeof(u8));
+		if (config->map_tiles == NULL) {
+			ret = -ENOMEM;
+			goto clean_up;
+		}
+		config->map_tile_count = map_tile_count;
+
+		for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
+			sorted_num_tpcs[gpc_index] = config->gpc_tpc_count[gpc_index];
+			sorted_to_unsorted_gpc_map[gpc_index] = gpc_index;
+		}
+
+		gpc_sorted = false;
+		while (!gpc_sorted) {
+			gpc_sorted = true;
+			for (gpc_index = 0U; gpc_index < config->gpc_count - 1U; gpc_index++) {
+				if (sorted_num_tpcs[gpc_index + 1U] > sorted_num_tpcs[gpc_index]) {
+					gpc_sorted = false;
+					swap = sorted_num_tpcs[gpc_index];
+					sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1U];
+					sorted_num_tpcs[gpc_index + 1U] = swap;
+					swap = sorted_to_unsorted_gpc_map[gpc_index];
+					sorted_to_unsorted_gpc_map[gpc_index] =
+						sorted_to_unsorted_gpc_map[gpc_index + 1U];
+					sorted_to_unsorted_gpc_map[gpc_index + 1U] = swap;
+				}
+			}
+		}
+
+		for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
+			if (config->gpc_tpc_count[gpc_index] > max_tpc_count) {
+				max_tpc_count = config->gpc_tpc_count[gpc_index];
+			}
+		}
+
+		mul_factor = S32(config->gpc_count) * S32(max_tpc_count);
+		if ((U32(mul_factor) & 0x1U) != 0U) {
+			mul_factor = 2;
+		} else {
+			mul_factor = 1;
+		}
+
+		comm_denom = S32(config->gpc_count) * S32(max_tpc_count) * mul_factor;
+
+		for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
+			num_tpc = sorted_num_tpcs[gpc_index];
+
+			init_frac[gpc_index] = S32(num_tpc) * S32(config->gpc_count) * mul_factor;
+
+			if (num_tpc != 0U) {
+				init_err[gpc_index] = S32(gpc_index) * S32(max_tpc_count) * mul_factor - comm_denom/2;
+			} else {
+				init_err[gpc_index] = 0;
+			}
+
+			run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
+		}
+
+		while (gpc_mark < config->tpc_count) {
+			for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
+				if ((run_err[gpc_index] * 2) >= comm_denom) {
+					config->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
+					run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
+				} else {
+					run_err[gpc_index] += init_frac[gpc_index];
+				}
+			}
+		}
+	}
+
+clean_up:
+	nvgpu_kfree(g, init_frac);
+	nvgpu_kfree(g, init_err);
+	nvgpu_kfree(g, run_err);
+	nvgpu_kfree(g, sorted_num_tpcs);
+	nvgpu_kfree(g, sorted_to_unsorted_gpc_map);
+
+	if (ret != 0) {
+		nvgpu_err(g, "fail");
+	} else {
+		nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
+	}
+
+	return ret;
+}
+
+u32 nvgpu_gr_config_get_max_zcull_per_gpc_count(struct nvgpu_gr_config *config)
+{
+	return config->max_zcull_per_gpc_count;
+}
+
+u32 nvgpu_gr_config_get_zcb_count(struct nvgpu_gr_config *config)
+{
+	return config->zcb_count;
+}
+
+u32 nvgpu_gr_config_get_gpc_zcb_count(struct nvgpu_gr_config *config,
+	u32 gpc_index)
+{
+	return config->gpc_zcb_count[gpc_index];
+}
+#endif
+
+void nvgpu_gr_config_deinit(struct gk20a *g, struct nvgpu_gr_config *config)
+{
+	if (config == NULL) {
+		return;
+	}
+
+	gr_config_free_mem(g, config);
+#ifdef CONFIG_NVGPU_GRAPHICS
+	nvgpu_kfree(g, config->map_tiles);
+#endif
+	nvgpu_kfree(g, config->sm_to_cluster);
+#ifdef CONFIG_NVGPU_SM_DIVERSITY
+	if (config->sm_to_cluster_redex_config != NULL) {
+		nvgpu_kfree(g, config->sm_to_cluster_redex_config);
+		config->sm_to_cluster_redex_config = NULL;
+	}
+#endif
+}
+
+u32 nvgpu_gr_config_get_max_gpc_count(struct nvgpu_gr_config *config)
+{
+	return config->max_gpc_count;
+}
+
+u32 nvgpu_gr_config_get_max_tpc_per_gpc_count(struct nvgpu_gr_config *config)
+{
+	return config->max_tpc_per_gpc_count;
+}
+
+u32 nvgpu_gr_config_get_max_tpc_count(struct nvgpu_gr_config *config)
+{
+	return config->max_tpc_count;
+}
+
+u32 nvgpu_gr_config_get_gpc_count(struct nvgpu_gr_config *config)
+{
+	return config->gpc_count;
+}
+
+u32 nvgpu_gr_config_get_tpc_count(struct nvgpu_gr_config *config)
+{
+	return config->tpc_count;
+}
+
+u32 nvgpu_gr_config_get_ppc_count(struct nvgpu_gr_config *config)
+{
+	return config->ppc_count;
+}
+
+u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config)
+{
+	return config->pe_count_per_gpc;
+}
+
+u32 nvgpu_gr_config_get_sm_count_per_tpc(struct nvgpu_gr_config *config)
+{
+	return config->sm_count_per_tpc;
+}
+
+u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config,
+	u32 gpc_index)
+{
+	nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
+	return config->gpc_ppc_count[gpc_index];
+}
+
+u32 *nvgpu_gr_config_get_gpc_tpc_count_base(struct nvgpu_gr_config *config)
+{
+	return config->gpc_tpc_count;
+}
+
+u32 nvgpu_gr_config_get_gpc_tpc_count(struct nvgpu_gr_config *config,
+	u32 gpc_index)
+{
+	if (gpc_index >= config->gpc_count) {
+		return 0;
+	}
+	return config->gpc_tpc_count[gpc_index];
+}
+
+u32 nvgpu_gr_config_get_pes_tpc_count(struct nvgpu_gr_config *config,
+	u32 gpc_index, u32 pes_index)
+{
+	nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
+	nvgpu_assert(pes_index < nvgpu_gr_config_get_pe_count_per_gpc(config));
+	return config->pes_tpc_count[pes_index][gpc_index];
+}
+
+u32 *nvgpu_gr_config_get_gpc_tpc_mask_base(struct nvgpu_gr_config *config)
+{
+	return config->gpc_tpc_mask;
+}
+
+u32 nvgpu_gr_config_get_gpc_tpc_mask(struct nvgpu_gr_config *config,
+	u32 gpc_index)
+{
+	nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
+	return config->gpc_tpc_mask[gpc_index];
+}
+
+void nvgpu_gr_config_set_gpc_tpc_mask(struct nvgpu_gr_config *config,
+	u32 gpc_index, u32 val)
+{
+	nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
+	config->gpc_tpc_mask[gpc_index] = val;
+}
+
+u32 nvgpu_gr_config_get_gpc_skip_mask(struct nvgpu_gr_config *config,
+	u32 gpc_index)
+{
+	if (gpc_index >= config->gpc_count) {
+		return 0;
+	}
+	return config->gpc_skip_mask[gpc_index];
+}
+
+u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config,
+	u32 gpc_index, u32 pes_index)
+{
+	nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
+	nvgpu_assert(pes_index < nvgpu_gr_config_get_pe_count_per_gpc(config));
+	return config->pes_tpc_mask[pes_index][gpc_index];
+}
+
+u32 nvgpu_gr_config_get_gpc_mask(struct nvgpu_gr_config *config)
+{
+	return config->gpc_mask;
+}
+
+u32 nvgpu_gr_config_get_no_of_sm(struct nvgpu_gr_config *config)
+{
+	return config->no_of_sm;
+}
+
+void nvgpu_gr_config_set_no_of_sm(struct nvgpu_gr_config *config, u32 no_of_sm)
+{
+	config->no_of_sm = no_of_sm;
+}
+
+struct nvgpu_sm_info *nvgpu_gr_config_get_sm_info(struct nvgpu_gr_config *config,
+	u32 sm_id)
+{
+	return &config->sm_to_cluster[sm_id];
+}
+
+#ifdef CONFIG_NVGPU_SM_DIVERSITY
+struct nvgpu_sm_info *nvgpu_gr_config_get_redex_sm_info(
+	struct nvgpu_gr_config *config, u32 sm_id)
+{
+	return &config->sm_to_cluster_redex_config[sm_id];
+}
+#endif
+
+u32 nvgpu_gr_config_get_sm_info_gpc_index(struct nvgpu_sm_info *sm_info)
+{
+	return sm_info->gpc_index;
+}
+
+void nvgpu_gr_config_set_sm_info_gpc_index(struct nvgpu_sm_info *sm_info,
+	u32 gpc_index)
+{
+	sm_info->gpc_index = gpc_index;
+}
+
+u32 nvgpu_gr_config_get_sm_info_tpc_index(struct nvgpu_sm_info *sm_info)
+{
+	return sm_info->tpc_index;
+}
+
+void nvgpu_gr_config_set_sm_info_tpc_index(struct nvgpu_sm_info *sm_info,
+	u32 tpc_index)
+{
+	sm_info->tpc_index = tpc_index;
+}
+
+u32 nvgpu_gr_config_get_sm_info_global_tpc_index(struct nvgpu_sm_info *sm_info)
+{
+	return sm_info->global_tpc_index;
+}
+
+void nvgpu_gr_config_set_sm_info_global_tpc_index(struct nvgpu_sm_info *sm_info,
+	u32 global_tpc_index)
+{
+	sm_info->global_tpc_index = global_tpc_index;
+}
+
+u32 nvgpu_gr_config_get_sm_info_sm_index(struct nvgpu_sm_info *sm_info)
+{
+	return sm_info->sm_index;
+}
+
+void nvgpu_gr_config_set_sm_info_sm_index(struct nvgpu_sm_info *sm_info,
+	u32 sm_index)
+{
+	sm_info->sm_index = sm_index;
+}
--- a/drivers/gpu/nvgpu/common/gr/gr_config_priv.h
+++ b/drivers/gpu/nvgpu/common/gr/gr_config_priv.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_GR_CONFIG_PRIV_H
+#define NVGPU_GR_CONFIG_PRIV_H
+
+#include <nvgpu/types.h>
+
+/**
+ * Max possible PES count per GPC.
+ */
+#define GK20A_GR_MAX_PES_PER_GPC 3U
+
+struct gk20a;
+
+/**
+ * Detailed information of SM indexes in GR engine.
+ */
+struct nvgpu_sm_info {
+	/**
+	 * Index of GPC for SM.
+	 */
+	u32 gpc_index;
+
+	/**
+	 * Index of TPC for SM.
+	 */
+	u32 tpc_index;
+
+	/**
+	 * Index of SM within TPC.
+	 */
+	u32 sm_index;
+
+	/**
+	 * Global TPC index for SM.
+	 */
+	u32 global_tpc_index;
+};
+
+/**
+ * GR engine configuration data.
+ *
+ * This data is populated during GR initialization and referred across
+ * GPU driver through public APIs.
+ */
+struct nvgpu_gr_config {
+	/**
+	 * Pointer to GPU driver struct.
+	 */
+	struct gk20a *g;
+
+	/**
+	 * Max possible number of GPCs in GR engine.
+	 */
+	u32 max_gpc_count;
+	/**
+	 * Max possible number of TPCs per GPC in GR engine.
+	 */
+	u32 max_tpc_per_gpc_count;
+	/**
+	 * Max possible number of TPCs in GR engine.
+	 */
+	u32 max_tpc_count;
+
+	/**
+	 * Number of GPCs in GR engine.
+	 */
+	u32 gpc_count;
+	/**
+	 * Number of TPCs in GR engine.
+	 */
+	u32 tpc_count;
+	/**
+	 * Number of PPCs in GR engine.
+	 */
+	u32 ppc_count;
+
+	/**
+	 * Number of PES per GPC in GR engine.
+	 */
+	u32 pe_count_per_gpc;
+	/**
+	 * Number of SMs per TPC in GR engine.
+	 */
+	u32 sm_count_per_tpc;
+
+	/**
+	 * Array to hold number of PPC units per GPC.
+	 * Array is indexed by GPC index.
+	 */
+	u32 *gpc_ppc_count;
+	/**
+	 * Array to hold number of TPCs per GPC.
+	 * Array is indexed by GPC index.
+	 */
+	u32 *gpc_tpc_count;
+	/**
+	 * 2-D array to hold number of TPCs attached to a PES unit
+	 * in a GPC.
+	 */
+	u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
+
+	/**
+	 * Mask of GPCs. A set bit indicates GPC is available, otherwise
+	 * it is not available.
+	 */
+	u32 gpc_mask;
+
+	/**
+	 * Array to hold mask of TPCs per GPC.
+	 * Array is indexed by GPC index.
+	 */
+	u32 *gpc_tpc_mask;
+	/**
+	 * 2-D array to hold mask of TPCs attached to a PES unit
+	 * in a GPC.
+	 */
+	u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
+	/**
+	 * Array to hold skip mask of TPCs per GPC.
+	 * Array is indexed by GPC index.
+	 */
+	u32 *gpc_skip_mask;
+
+	/**
+	 * Number of SMs in GR engine.
+	 */
+	u32 no_of_sm;
+	/**
+	 * Pointer to SM information struct.
+	 */
+	struct nvgpu_sm_info *sm_to_cluster;
+#ifdef CONFIG_NVGPU_SM_DIVERSITY
+	/**
+	 * Pointer to redundant execution config SM information struct.
+	 * It is valid only if NVGPU_SUPPORT_SM_DIVERSITY support is true.
+	 */
+	struct nvgpu_sm_info *sm_to_cluster_redex_config;
+#endif
+
+#ifdef CONFIG_NVGPU_GRAPHICS
+	u32 max_zcull_per_gpc_count;
+	u32 zcb_count;
+	u32 *gpc_zcb_count;
+
+	u8 *map_tiles;
+	u32 map_tile_count;
+	u32 map_row_offset;
+#endif
+};
+
+#endif /* NVGPU_GR_CONFIG_PRIV_H */
--- a/Show More
+++ b/Show More